simran-t commited on 3 days ago

Commit

68d34af

•

1 Parent(s): 64900ad

Add new SentenceTransformer model

Browse files

Files changed (20) hide show

1_Pooling/config.json +10 -0
README.md +586 -0
config.json +103 -0
config_sentence_transformers.json +10 -0
configuration_nvembed.py +90 -0
model-00001-of-00007.safetensors +3 -0
model-00002-of-00007.safetensors +3 -0
model-00003-of-00007.safetensors +3 -0
model-00004-of-00007.safetensors +3 -0
model-00005-of-00007.safetensors +3 -0
model-00006-of-00007.safetensors +3 -0
model-00007-of-00007.safetensors +3 -0
model.safetensors.index.json +311 -0
modeling_nvembed.py +441 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +50 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 4096,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": false
+}

README.md ADDED Viewed

	@@ -0,0 +1,586 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:16186
+- loss:MultipleNegativesRankingLoss
+base_model: nvidia/NV-Embed-v2
+widget:
+- source_sentence: 'Instruct: Given a question, retrieve passages that answer the
+    question. Query: what is the numeric dose of the Pembrolizumab Regimen?'
+  sentences:
+  - "Source: Radiology. Date: 2019-11-06. Context: 11/06/2019 1:03:20 PM  -0500496d70726f7665204865616c7468\
+    \    PAGE 2 OF 3\n    ________ ________ ________\n___ _____ ___ _____ _____, __\
+    \ _____-____\nIMAGING SERVICES\nPatient Name:    Exam Date/Time:    Phone _: \
+    \   MRN:\nYoung, _______ _______    11/06/2019 11:50 AM    ___-___-____    ______\n\
+    DOB:    Se    Account _:\n11/3/1939    Female    _________\nPt Class:    Accession\
+    \ _:    Performing Department:\nOutpatient    _________    MRI - FMH\nPrimary\
+    \ Care Provider:    Ordering Provider:    Authorizing Provider:\n______, ____\
+    \ _    ______, _______ _    ______, _______ _\nLaterality:\n9    Final - MRI BRAIN\
+    \ W/WO CONT"
+  - 'Source: SOAP_Note. Date: 2022-01-30. Context: _12 TAB
+    Prov:   01/19/22
+    D: 01/23/22 1545 Patient stopped taking
+    Reported Medications
+    ONDANSETRON (ZOFRAN) 4 MG PO Q6H
+    Metoprolol Succinate (TOPROL XL) 50 MG PO DAILY
+    predniSONE 5 MG PO DAILY
+    TRAMETINIB DIMETHYL SULFOXIDE (MEKINIST) 2 MG PO DAILY
+    DABRAFENIB MESYLATE (TAFINLAR) 100 MG PO BID
+    LOSARTAN (COZAAR) 50 MG PO DAILY
+    MIRTAZAPINE (REMERON) 7.5 MG PO BEDTIME
+    MED LIST INFORMATION 1 EA - CANCEL AT DISCHARGE
+    Additional Medical History
+    PMH:
+    Stage 4 Melanoma Cancer
+    Additional Surgical History
+    '
+  - "Source: SOAP_Note. Date: 2024-02-17. Context: 60 mg-90 mg-500 mg) qd \n* Metoprolol\
+    \ Oral 24 hr Tab (Succinate) 25 mg tablet extended release 24 hr  \n Regimens:\n\
+    \ Pembrolizumab Q21D (Flat Dose) (Adjuvant Melanoma, RCC)\n Hydration IV and Electrolyte\
+    \ Replacement Supportive Care\n \n \n \n  Allergies\n "
+- source_sentence: 'Instruct: Given a question, retrieve passages that answer the
+    question. Query: how many Radiation Therapy fractions were administered?'
+  sentences:
+  - "Source: SOAP_Note. Date: 2024-10-03. Context: PET with large volume metastatic\
+    \ disease involving the bones, soft tissue, and lung parenchyma bilaterally.\n\
+    \ - Radiation therapy left shoulder, right SI joint, right femur completed 1/5/22.\n\
+    \ - Nivolumab and ipilimumab initiated 11/24/21. "
+  - 'Source: SOAP_Note. Date: 2019-08-21. Context: 4 weeks, Print on Rx., Instructions/Comments:
+    nivolumab. [Updated. _______ _. _____ 08/21/2019 13:56].
+    Cancer Regimens Nivolumab Q28D (Flat Dose, Adjuvant Melanoma): C2D1. [_______
+    _. _____ 08/21/2019 15:18].I.V. access: peripheral IV, Site: '
+  - "Source: SOAP_Note. Date: 2023-11-27. Context: per day, down from 1.5 ppd. He\
+    \ has been smoking for the past 40 years.\n He denies alcohol use.\n He worked\
+    \ for ____ ______ / _____ _____ _____ \n \n               FAMILY HISTORY:\n Mother,\
+    \ age 94, Merkle cell carcinoma in her 70s. Daughter, age 52, brain tumor.\n Father,\
+    \ deceased at age 66, heart disease.\n \n   REVIEW OF SYSTEMS:    A comprehensive\
+    \ (10+) review of systems was performed today and was negative unless noted above.\n\
+    \    \n   VITALS: Blood pressure: 128/79, Sitting, Regular, Pulse: 110, "
+- source_sentence: 'Instruct: Given a question, retrieve passages that answer the
+    question. Query: when did the Dabrafenib Regimen start?'
+  sentences:
+  - 'Source: SOAP_Note. Date: 2018-11-29. Context: Take 1 PO daily, Instructions:
+    Take at least 1 hour before or two hours after a meal. [______ ______ 12/26/2018
+    13:46].Dabrafenib mesylate, po solid: 75 mg Capsule Take 2 PO BID, Instructions:
+    Take whole, at least 1 hour before or two hours after a '
+  - "Source: Pathology. Date: 2021-06-22. Context: Referral:  SECONDARY AND UNSPECIFIED\
+    \ MALIGNANT NEOPLASM OF LYMPH\nNODE, UNSPECIFIED\nFX4\nResults    HEENT:     \n\
+    HEE    BRAF V600E\nNot Expressed\n1\n\n    M\n19    \n1.10 78\nH\n\n1\n*   A \
+    \   \nA\nI    \nIntended Use:\nStains were scored by a pathologist using "
+  - "Source: SOAP_Note. Date: 2024-09-16. Context:                               \
+    \     Mr. _____ is married and he lives with his wife in _____ _____, __.\n The\
+    \ patient has cut back to 5 cigarettes per day, down from 1.5 ppd. He has been\
+    \ smoking for the past 40 years.\n He denies alcohol use.\n He worked for Duke\
+    \ Energy / "
+- source_sentence: 'Instruct: Given a question, retrieve passages that answer the
+    question. Query: when was the Reexcision performed?'
+  sentences:
+  - "Source: SOAP_Note. Date: 2024-06-13. Context: scan showed cutaneous involvement\
+    \ in the skin and also right inguinal adenopathy. No evidence of distant metastases.\
+    \ Opdualag _1.\n \n  10/03/2023: The patient complains of vertigo and wants to\
+    \ delay her next treatment. We will add Dramamine.\n \n  "
+  - "Source: Pathology. Date: 2022-03-23. Context: MD    ______, _______\n________\
+    \ ____ _________ - _______ ____    DOB: 09/14/1959\n______    ____ __ ____ Rd\
+    \    Age: 62\n__        _____ ___    Sex:  Male\n___    _____, __ _____\n___-___-____\n\
+    \    8    Accession _:  ____-_____\nCollection  Date: 03/23/2022\nollection Date:\
+    \ 03/23/    MRN: _____\nReceived Date: 03/23/2022\nReported Date: 03/24/2022\n\
+    SKIN, MID FRONTAL SCALP, EXCISION -\nNO EVIDENCE OF MALIGNANCY, FINAL MARGINS\
+    \ FREE OF TUMOR.\nSEE COMMENT.\nComment: Portions of deep subcutaneous fat and\
+    \ fascia are seen, all free of malignancy.\n\n_______ _. ______, MD\n**Electronically\
+    \ Signed on 24 MAR 2022 12:03PM**    8\nCLINICAL DATA:\nMID FRONTAL SCALP - EXCISION"
+  - "Source: Genetic_Testing. Date: 2023-08-21. Context: and a    STERETCHING\nvariants\
+    \ including genes associated wi    08 in    7/31    \n18 comination repair deficiency\
+    \    * fusion    NTR2 on    \n11 (HR/HRD, microsatellite instability (MS    gain\
+    \    Eston\nare umr mutational surgen 3.        Kat    "
+- source_sentence: 'Instruct: Given a question, retrieve passages that answer the
+    question. Query: what is the total dose administered in the EBRT Intensity Modulated
+    Radiation Therapy?'
+  sentences:
+  - "Source: SOAP_Note. Date: 2022-10-10. Context: given. \n \n Interim History\n\
+    \ \n _____ was last seen on 09/16/2022, at which time he started adjuvant immunotherapy\
+    \ with Keytruda q21 days. Here today for follow up and labs prior to C2 of treatment.\
+    \ States he is overall feeling well. Tolerated the "
+  - "Source: SOAP_Note. Date: 2020-03-13. Context: MV electrons.\n \n FIELDS:\n The\
+    \ right orbital mass and right cervical lymph nodes were initially treated with\
+    \ a two arc IMRT plan. Arc 1: 11.4 x 21 cm. Gantry start and stop angles 178 degrees\
+    \ / 182 degrees. Arc 2: 16.4 x 13.0 cm. Gantry start "
+  - "Source: Radiology. Date: 2023-09-18. Context: : >60\n \n Contrast Type: OMNI\
+    \ 350\n   Volume: 80ML\n \n Lot_: ________\n \n Exp. date: 05/26 \n Study Completed:\
+    \ CT CHEST W\n \n Reading Group:BCH \n  \n   Prior Studies for Comparison: 06/14/23\
+    \ CT CHEST W RMCC  \n \n ________ ______\n  "
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy@1
+- cosine_accuracy@3
+- cosine_accuracy@5
+- cosine_accuracy@10
+- cosine_precision@1
+- cosine_precision@3
+- cosine_precision@5
+- cosine_precision@10
+- cosine_recall@1
+- cosine_recall@3
+- cosine_recall@5
+- cosine_recall@10
+- cosine_ndcg@10
+- cosine_mrr@10
+- cosine_map@100
+model-index:
+- name: SentenceTransformer based on nvidia/NV-Embed-v2
+  results:
+  - task:
+      type: patient-qa
+      name: Patient QA
+    dataset:
+      name: ontada test
+      type: ontada-test
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.6856459330143541
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.9531100478468899
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.990909090909091
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 1.0
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.6856459330143541
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.5208931419457735
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.39693779904306226
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.22511961722488041
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.4202789169894433
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.8154078377762588
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.9453700539226855
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 1.0046297562087037
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.8649347118737546
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.8190546441862219
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.804978870109979
+      name: Cosine Map@100
+---
+# SentenceTransformer based on nvidia/NV-Embed-v2
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nvidia/NV-Embed-v2](https://huggingface.co/nvidia/NV-Embed-v2). It maps sentences & paragraphs to a 4096-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [nvidia/NV-Embed-v2](https://huggingface.co/nvidia/NV-Embed-v2) <!-- at revision 7604d305b621f14095a1aa23d351674c2859553a -->
+- **Maximum Sequence Length:** 1024 tokens
+- **Output Dimensionality:** 4096 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: NVEmbedModel
+  (1): Pooling({'word_embedding_dimension': 4096, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': False})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("MendelAI/nv-embed-v2-ontada-twab-peft")
+# Run inference
+sentences = [
+    'Instruct: Given a question, retrieve passages that answer the question. Query: what is the total dose administered in the EBRT Intensity Modulated Radiation Therapy?',
+    'Source: SOAP_Note. Date: 2020-03-13. Context: MV electrons.\n \n FIELDS:\n The right orbital mass and right cervical lymph nodes were initially treated with a two arc IMRT plan. Arc 1: 11.4 x 21 cm. Gantry start and stop angles 178 degrees / 182 degrees. Arc 2: 16.4 x 13.0 cm. Gantry start ',
+    'Source: Radiology. Date: 2023-09-18. Context: : >60\n \n Contrast Type: OMNI 350\n   Volume: 80ML\n \n Lot_: ________\n \n Exp. date: 05/26 \n Study Completed: CT CHEST W\n \n Reading Group:BCH \n  \n   Prior Studies for Comparison: 06/14/23 CT CHEST W RMCC  \n \n ________ ______\n  ',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 4096]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Patient QA
+* Dataset: `ontada-test`
+* Evaluated with [<code>PatientQAEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.PatientQAEvaluator)
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.6856     |
+| cosine_accuracy@3   | 0.9531     |
+| cosine_accuracy@5   | 0.9909     |
+| cosine_accuracy@10  | 1.0        |
+| cosine_precision@1  | 0.6856     |
+| cosine_precision@3  | 0.5209     |
+| cosine_precision@5  | 0.3969     |
+| cosine_precision@10 | 0.2251     |
+| cosine_recall@1     | 0.4203     |
+| cosine_recall@3     | 0.8154     |
+| cosine_recall@5     | 0.9454     |
+| cosine_recall@10    | 1.0046     |
+| **cosine_ndcg@10**  | **0.8649** |
+| cosine_mrr@10       | 0.8191     |
+| cosine_map@100      | 0.805      |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 16,186 training samples
+* Columns: <code>question</code> and <code>context</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | question                                                                           | context                                                                              |
+  |:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                               |
+  | details | <ul><li>min: 25 tokens</li><li>mean: 30.78 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 74 tokens</li><li>mean: 177.84 tokens</li><li>max: 398 tokens</li></ul> |
+* Samples:
+  | question                                                                                                                                  | context                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+  |:------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>Instruct: Given a question, retrieve passages that answer the question. Query: what was the abnormality identified for BRAF?</code> | <code>Source: Genetic_Testing. Date: 2022-10-07. Context: Mutational    Seq    DNA-Tumor    Low, 6 mt/Mb    NF1<br>Seq    DNA-Tumor    Mutation Not Detected<br>T    In Not D<br>ARID2    Seq    DNA-Tumor    Mutation Not Detected    CNA-Seq     DNA-Tumor    Deletion Not Detected<br>        PTEN<br>Seq    RNA-Tumor    Fusion Not Detected    Seq    DNA-Tumor    Mutation Not Detected<br>BRAF        <br>        Amplification Not    _<br>CNA-Seq    DNA-Tumor    Detected    RAC1    Seq    DNA-Tumor    Mutation Not Detected<br>The selection of any, all, or none of the matched therapies </code>         |
+  | <code>Instruct: Given a question, retrieve passages that answer the question. Query: what was the abnormality identified for BRAF?</code> | <code>Source: Genetic_Testing. Date: 2021-06-04. Context: characteristics have been determined by _____ ___________<br>_______ _________ ___ ____ __________. It has not been<br>cleared or approved by FDA. This assay has been validated<br>pursuant to the CLIA regulations and is used for clinical<br>purposes.<br>BRAF MUTATION ANALYSIS        E<br>SOURCE:    LYMPH NODE<br>PARAFFIN BLOCK NUMBER:    ____-_______ A4<br>BRAF MUTATION ANALYSIS    NOT DETECTED    NOT DETECTED<br>This result was reviewed and interpreted by _. ____, M.D.<br>Based on Sanger sequencing analysis, no mutations </code>       |
+  | <code>Instruct: Given a question, retrieve passages that answer the question. Query: what was the abnormality identified for BRAF?</code> | <code>Source: Pathology. Date: 2019-12-12. Context: Receive Date:    12/12/2019<br>___ _:    ________________    Accession Date:    12/12/2019<br>Copy To:    Report Date:    12/19/2019 18:16<br>***SUPPLEMENTAL REPORT***<br>(previous report date: 12/19/2019)<br>BRAF SNAPSHOT<br>Results:<br>POSITIVE<br>Interpretation:<br>A BRAF mutation was detected in the provided specimen.<br>FDA has approved TKI inhibitor vemurafenib and dabrafenib for the first-line treatment of patients with<br>unresectable or metastatic melanoma whose tumors have a BRAF V600E mutation, and trametinib for tumors<br></code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 64
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 1
+- `warmup_ratio`: 0.1
+- `seed`: 6789
+- `bf16`: True
+- `prompts`: {'question': 'Instruct: Given a question, retrieve passages that answer the question. Query: '}
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 64
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 6789
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: False
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `prompts`: {'question': 'Instruct: Given a question, retrieve passages that answer the question. Query: '}
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss | ontada-test_cosine_ndcg@10 |
+|:------:|:----:|:-------------:|:--------------------------:|
+| 0      | 0    | -             | 0.8431                     |
+| 0.0002 | 1    | 1.5826        | -                          |
+| 0.0371 | 150  | 0.4123        | -                          |
+| 0.0741 | 300  | 0.3077        | -                          |
+| 0.1112 | 450  | 0.2184        | -                          |
+| 0.1483 | 600  | 0.3291        | -                          |
+| 0.1853 | 750  | 0.2343        | -                          |
+| 0.2224 | 900  | 0.2506        | -                          |
+| 0.2471 | 1000 | -             | 0.8077                     |
+| 0.2595 | 1050 | 0.1294        | -                          |
+| 0.2965 | 1200 | 0.0158        | -                          |
+| 0.3336 | 1350 | 0.0189        | -                          |
+| 0.3706 | 1500 | 0.0363        | -                          |
+| 0.4077 | 1650 | 0.0208        | -                          |
+| 0.4448 | 1800 | 0.475         | -                          |
+| 0.4818 | 1950 | 0.6183        | -                          |
+| 0.4942 | 2000 | -             | 0.8482                     |
+| 0.5189 | 2100 | 0.4779        | -                          |
+| 0.5560 | 2250 | 0.4194        | -                          |
+| 0.5930 | 2400 | 0.8376        | -                          |
+| 0.6301 | 2550 | 0.4249        | -                          |
+| 0.6672 | 2700 | 0.9336        | -                          |
+| 0.7042 | 2850 | 0.5351        | -                          |
+| 0.7413 | 3000 | 1.0253        | 0.8551                     |
+| 0.7784 | 3150 | 0.3961        | -                          |
+| 0.8154 | 3300 | 0.3881        | -                          |
+| 0.8525 | 3450 | 0.5573        | -                          |
+| 0.8895 | 3600 | 1.222         | -                          |
+| 0.9266 | 3750 | 0.3032        | -                          |
+| 0.9637 | 3900 | 0.3142        | -                          |
+| 0.9884 | 4000 | -             | 0.8645                     |
+| 1.0    | 4047 | -             | 0.8649                     |
+### Framework Versions
+- Python: 3.11.10
+- Sentence Transformers: 3.4.0.dev0
+- Transformers: 4.46.0
+- PyTorch: 2.3.1+cu121
+- Accelerate: 1.0.1
+- Datasets: 3.0.1
+- Tokenizers: 0.20.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "_name_or_path": "/workspace/data/june/sentence-transformers/outputs/2024-11-18/19-28-23/models/nv-embed-v2-ontada-twab-peft/final/",
+  "add_eos": true,
+  "add_pad_token": true,
+  "architectures": [
+    "NVEmbedModel"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_nvembed.NVEmbedConfig",
+    "AutoModel": "modeling_nvembed.NVEmbedModel"
+  },
+  "hidden_size": 4096,
+  "is_mask_instruction": true,
+  "latent_attention_config": {
+    "model_type": "latent_attention"
+  },
+  "mask_type": "b",
+  "model_type": "nvembed",
+  "padding_side": "right",
+  "text_config": {
+    "_attn_implementation_autoset": false,
+    "_name_or_path": "nvidia/NV-Embed-v2",
+    "add_cross_attention": false,
+    "architectures": [
+      "MistralModel"
+    ],
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 1,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_range": 0.02,
+    "intermediate_size": 14336,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 32768,
+    "min_length": 0,
+    "model_type": "bidir_mistral",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 32,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 8,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "rms_norm_eps": 1e-05,
+    "rope_theta": 10000.0,
+    "sep_token_id": null,
+    "sliding_window": 4096,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": "float32",
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 32000
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.0"
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.4.0.dev0",
+    "transformers": "4.46.0",
+    "pytorch": "2.3.1+cu121"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

configuration_nvembed.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from typing import Literal
+from transformers import AutoConfig
+from transformers.configuration_utils import PretrainedConfig
+from transformers.models.auto import CONFIG_MAPPING
+from transformers.models.mistral import MistralConfig
+NVEMBED_TYPE = "nvembed"
+LATENT_ATTENTION_TYPE = "latent_attention"
+BIDIR_MISTRAL_TYPE = "bidir_mistral"
+class NVEmbedConfig(PretrainedConfig):
+    model_type = "nvembed"
+    is_composition = False
+    def __init__(
+        self,
+        latent_attention_config=None,
+        text_config=None,
+        padding_side: Literal["right", "left"]="right",
+        add_pad_token: bool=True,
+        is_mask_instruction: bool = True,
+        add_eos: bool=True,
+        mask_type: str="b",
+        **kwargs,
+    ):
+        if isinstance(latent_attention_config, dict):
+            latent_attention_config["model_type"] = (
+                latent_attention_config["model_type"] if "model_type" in latent_attention_config else LATENT_ATTENTION_TYPE
+            )
+            latent_attention_config = CONFIG_MAPPING[latent_attention_config["model_type"]](**latent_attention_config)
+        elif latent_attention_config is None:
+            latent_attention_config = CONFIG_MAPPING[LATENT_ATTENTION_TYPE]()
+        self.latent_attention_config = latent_attention_config
+        if isinstance(text_config, dict):
+            text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama"
+            text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
+        elif text_config is None:
+            text_config = None
+        self.text_config = text_config
+        self.padding_side = padding_side
+        self.is_mask_instruction = is_mask_instruction
+        self.add_pad_token = add_pad_token
+        self.add_eos = add_eos
+        self.mask_type = mask_type
+        if "hidden_size" in kwargs:
+            self.hidden_size = kwargs["hidden_size"]
+        else:
+            self.hidden_size = 4096
+        super().__init__(**kwargs)
+class LatentAttentionConfig(PretrainedConfig):
+    model_type = LATENT_ATTENTION_TYPE
+    is_composition = False
+    _name_or_path = "latent_attention"
+    def __init__(
+        self,
+        num_latents_value: int=512,
+        num_cross_heads: int=8,
+        output_normalize: bool=True,
+        hidden_dim: int=4096,
+        latent_dim: int=4096,
+        cross_dim_head: int=4096,
+        **kwargs,
+    ):
+        self.num_latents_value = num_latents_value
+        self.num_cross_heads = num_cross_heads
+        self.output_normalize = output_normalize
+        self.hidden_dim = hidden_dim
+        self.latent_dim = latent_dim
+        self.cross_dim_head = cross_dim_head
+class BidirectionalMistralConfig(MistralConfig):
+    model_type = BIDIR_MISTRAL_TYPE
+    keys_to_ignore_at_inference = ["past_key_values"]
+AutoConfig.register(NVEMBED_TYPE, NVEmbedConfig)
+AutoConfig.register(LATENT_ATTENTION_TYPE, LatentAttentionConfig)
+AutoConfig.register(BIDIR_MISTRAL_TYPE, BidirectionalMistralConfig)
+NVEmbedConfig.register_for_auto_class()
+LatentAttentionConfig.register_for_auto_class()
+BidirectionalMistralConfig.register_for_auto_class()

model-00001-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9da5d3a0f4722c5aaec4251748f9c531c07da032cf9ccac44af75e76862b1005
+size 4995698456

model-00002-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb80eefa9ae938158283d57b41011cfe7dedad39d28eb5b3d5757e6fb662185a
+size 4999813600

model-00003-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86a3be3f0deb8e186c216b75a1a31cb3547c4007e9488aaba139e69b0c687573
+size 4999813624

model-00004-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60dfb9d521133071297fe06f0436fd4abe5da4f8c8e545dcad083579ff957944
+size 4832007968

model-00005-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b22269d0eb46b0e767871becd03a0d72eeb5d577508cb93699c4c513c5919ab
+size 4999813656

model-00006-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01e3c07a33f15646debcffcfb5d8fead6f9d134677e7d49623bbb8c08b7a8a56
+size 4999813656

model-00007-of-00007.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:335f0defa0fe29f91969a1b8cd7cd15c5fe68b0129f99342f5d9d7096b6c06b8
+size 1577142096

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,311 @@

+{
+  "metadata": {
+    "total_size": 31404064768
+  },
+  "weight_map": {
+    "embedding_model.embed_tokens.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.1.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.1.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.1.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
+    "embedding_model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.13.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.13.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.13.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.13.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.13.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.14.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.14.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.14.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.14.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.14.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.18.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.18.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.18.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "embedding_model.layers.19.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.19.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.19.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.19.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.19.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.19.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.19.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.19.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.19.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.2.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.2.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.2.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.2.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.2.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.2.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.2.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.2.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.2.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.20.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.20.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.20.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.20.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.24.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.24.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.24.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.24.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "embedding_model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.25.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.25.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.25.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.25.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.25.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.25.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.3.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.3.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.3.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.3.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.30.input_layernorm.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.30.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.30.mlp.gate_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.30.mlp.up_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.30.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "embedding_model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.31.mlp.gate_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.31.self_attn.k_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.31.self_attn.o_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.31.self_attn.q_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.31.self_attn.v_proj.weight": "model-00007-of-00007.safetensors",
+    "embedding_model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.7.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.7.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.7.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.7.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "embedding_model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.8.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.8.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.8.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.8.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.8.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.8.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "embedding_model.norm.weight": "model-00007-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.fn.to_kv.weight": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.fn.to_out.weight": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.fn.to_q.weight": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.norm.bias": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.norm.weight": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.norm_context.bias": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.norm_context.weight": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.fn.net.0.bias": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.fn.net.0.weight": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.fn.net.2.bias": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.fn.net.2.weight": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.norm.bias": "model-00001-of-00007.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.norm.weight": "model-00001-of-00007.safetensors",
+    "latent_attention_model.latents": "model-00001-of-00007.safetensors"
+  }
+}

modeling_nvembed.py ADDED Viewed

	@@ -0,0 +1,441 @@

+from typing import List, Union, Dict, Mapping, Optional, Tuple, TypedDict
+import torch
+import os
+import json
+import numpy as np
+from functools import partial
+from contextlib import nullcontext
+from transformers import AutoModel, PreTrainedTokenizerFast, BatchEncoding, DataCollatorWithPadding
+from transformers.modeling_utils import PreTrainedModel
+from transformers.models.auto import AutoTokenizer
+from transformers.models.mistral.modeling_mistral import MISTRAL_INPUTS_DOCSTRING
+from transformers.modeling_outputs import BaseModelOutputWithPast
+from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
+from transformers import MistralModel, MistralConfig
+from transformers.cache_utils import Cache, DynamicCache
+from transformers.utils import (
+    add_start_docstrings_to_model_forward,
+    logging,
+)
+from einops import rearrange, repeat
+from tqdm.auto import tqdm
+from datasets import Dataset
+from torch.utils.data import DataLoader
+from .configuration_nvembed import NVEmbedConfig, LatentAttentionConfig, BidirectionalMistralConfig
+logger = logging.get_logger(__name__)
+class NVEmbedFeatures(TypedDict):
+    input_dict: torch.Tensor
+    attention_mask: torch.Tensor
+    pool_mask: torch.Tensor
+class BidirectionalMistralModel(MistralModel):
+    config_class = BidirectionalMistralConfig
+    def __init__(self, config: MistralConfig):
+        super().__init__(config)
+        for layer in self.layers:
+            layer.self_attn.is_causal = False
+        self._attn_implementation = "eager"
+    @add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPast]:
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        # retrieve input_ids and inputs_embeds
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
+        elif input_ids is not None:
+            batch_size, seq_length = input_ids.shape
+        elif inputs_embeds is not None:
+            batch_size, seq_length, _ = inputs_embeds.shape
+        else:
+            raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+        if self.gradient_checkpointing and self.training:
+            if use_cache:
+                logger.warning_once(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+        past_key_values_length = 0
+        if use_cache:
+            use_legacy_cache = not isinstance(past_key_values, Cache)
+            if use_legacy_cache:
+                past_key_values = DynamicCache.from_legacy_cache(past_key_values)
+            past_key_values_length = past_key_values.get_usable_length(seq_length)
+        if position_ids is None:
+            device = input_ids.device if input_ids is not None else inputs_embeds.device
+            position_ids = torch.arange(
+                past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
+            )
+            position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+        else:
+            position_ids = position_ids.view(-1, seq_length).long()
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+        if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
+            is_padding_right = attention_mask[:, -1].sum().item() != batch_size
+            if is_padding_right:
+                raise ValueError(
+                    "You are attempting to perform batched generation with padding_side='right'"
+                    " this may lead to unexpected behaviour for Flash Attention version of Mistral. Make sure to "
+                    " call `tokenizer.padding_side  = 'left'` before tokenizing the input. "
+                )
+        if self._attn_implementation == "flash_attention_2":
+            # 2d mask is passed through the layers
+            attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
+        elif self._attn_implementation == "sdpa" and not output_attentions:
+            # output_attentions=True can not be supported when using SDPA, and we fall back on
+            # the manual implementation that requires a 4D causal mask in all cases.
+            attention_mask = _prepare_4d_attention_mask_for_sdpa(
+                attention_mask, inputs_embeds.dtype
+            )
+        else:
+            # 4d mask is passed through the layers
+            attention_mask = _prepare_4d_attention_mask(
+                attention_mask, inputs_embeds.dtype,
+            )
+        hidden_states = inputs_embeds
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        next_decoder_cache = None
+        for decoder_layer in self.layers:
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    decoder_layer.__call__,
+                    hidden_states,
+                    attention_mask,
+                    position_ids,
+                    past_key_values,
+                    output_attentions,
+                    use_cache,
+                )
+            else:
+                layer_outputs = decoder_layer(
+                    hidden_states,
+                    attention_mask=attention_mask,
+                    position_ids=position_ids,
+                    past_key_value=past_key_values,
+                    output_attentions=output_attentions,
+                    use_cache=use_cache,
+                )
+            hidden_states = layer_outputs[0]
+            if use_cache:
+                next_decoder_cache = layer_outputs[2 if output_attentions else 1]
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+        hidden_states = self.norm(hidden_states)
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        next_cache = None
+        if use_cache:
+            next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
+        if not return_dict:
+            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+        )
+def _move_to_device(maybe_tensor, device: torch.device):
+    if torch.is_tensor(maybe_tensor):
+        return maybe_tensor.to(device, non_blocking=device.type == "cuda")
+    elif isinstance(maybe_tensor, dict):
+        return {key: _move_to_device(value, device) for key, value in maybe_tensor.items()}
+    elif isinstance(maybe_tensor, list):
+        return [_move_to_device(x, device) for x in maybe_tensor]
+    elif isinstance(maybe_tensor, tuple):
+        return tuple([_move_to_device(x, device) for x in maybe_tensor])
+    elif isinstance(maybe_tensor, Mapping):
+        return type(maybe_tensor)({k: _move_to_device(v, device) for k, v in maybe_tensor.items()})
+    else:
+        return maybe_tensor
+def move_to_device(sample, device: torch.device):
+    if device.type == "cpu":
+        return sample
+    if len(sample) == 0:
+        return {}
+    return _move_to_device(sample, device)
+def input_transform_func(
+    tokenizer: PreTrainedTokenizerFast,
+    examples: Dict[str, List],
+    always_add_eos: bool,
+    max_length: int,
+    instruction: str,
+) -> BatchEncoding:
+    if always_add_eos:
+        examples['input_texts'] = [instruction + input_example + tokenizer.eos_token for input_example in examples['input_texts']]
+    batch_dict = tokenizer(
+        examples['input_texts'],
+        max_length=max_length,
+        padding=True,
+        return_token_type_ids=False,
+        return_tensors="pt",
+        truncation=True)
+    return batch_dict
+class PreNorm(torch.nn.Module):
+    def __init__(self, dim, fn, context_dim = None):
+        super().__init__()
+        self.fn = fn
+        self.norm = torch.nn.LayerNorm(dim)
+        self.norm_context = torch.nn.LayerNorm(context_dim) if exists(context_dim) else None
+    def forward(self, x, **kwargs):
+        x = self.norm(x)
+        if exists(self.norm_context):
+            context = kwargs['context']
+            normed_context = self.norm_context(context)
+            kwargs.update(context = normed_context)
+        return self.fn(x, **kwargs)
+class GEGLU(torch.nn.Module):
+    def forward(self, x):
+        x, gates = x.chunk(2, dim = -1)
+        return x * torch.nn.functional.gelu(gates)
+class FeedForward(torch.nn.Module):
+    def __init__(self, dim, mult = 4):
+        super().__init__()
+        self.net = torch.nn.Sequential(torch.nn.Linear(dim, dim * mult * 2),
+            GEGLU(),
+            torch.nn.Linear(dim * mult, dim))
+    def forward(self, x):
+        return self.net(x)
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+class Attention(torch.nn.Module):
+    def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64):
+        super().__init__()
+        inner_dim = dim_head * heads
+        context_dim = default(context_dim, query_dim)
+        self.scale = dim_head ** -0.5
+        self.heads = heads
+        self.to_q = torch.nn.Linear(query_dim, inner_dim, bias = False)
+        self.to_kv = torch.nn.Linear(context_dim, inner_dim * 2, bias = False)
+        self.to_out = torch.nn.Linear(inner_dim, query_dim, bias = False)
+    def forward(self, x, context = None, mask = None):
+        h = self.heads
+        q = self.to_q(x)
+        context = default(context, x)
+        k, v = self.to_kv(context).chunk(2, dim = -1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
+        with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_mem_efficient=True):
+            out = torch.nn.functional.scaled_dot_product_attention(q, k, v)
+        out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
+        return self.to_out(out)
+class LatentAttentionModel(PreTrainedModel):
+    config_class = LatentAttentionConfig
+    def __init__(self, config: LatentAttentionConfig):
+        super().__init__(config)
+        ## cross-attention block
+        num_latents, latent_dim, cross_heads, cross_dim_head = config.num_latents_value, config.latent_dim, config.num_cross_heads, config.cross_dim_head
+        dim = config.hidden_dim
+        # init latent_attention and latents
+        self.cross_attend_blocks = torch.nn.ModuleList([
+            PreNorm(latent_dim, Attention(latent_dim, dim, heads = cross_heads, dim_head = cross_dim_head),
+                    context_dim = dim),
+            PreNorm(latent_dim, FeedForward(latent_dim)),
+        ])
+        self.output_normalize = config.output_normalize
+        self.register_parameter("latents", torch.nn.Parameter(torch.randn(num_latents, latent_dim)))
+    def forward(self, hiddens, attention_mask: torch.Tensor=None):
+        ## cross-attention block
+        cross_attn, cross_ff = self.cross_attend_blocks
+        b, *_, device = *hiddens.shape, hiddens.device
+        x = repeat(self.latents, 'n d -> b n d', b = b)
+        hiddens = cross_attn(hiddens, context = x, mask = None) + hiddens
+        hiddens = cross_ff(hiddens) + hiddens
+        if attention_mask !=None:
+            s = torch.sum(hiddens * attention_mask.unsqueeze(-1).float(), dim=1)
+            d = attention_mask.sum(dim=1, keepdim=True).float()
+            hiddens = s / d
+            if self.output_normalize:
+                hiddens = torch.nn.functional.normalize(hiddens, p=2, dim=-1)
+        return hiddens
+class NVEmbedModel(PreTrainedModel):
+    config_class = NVEmbedConfig
+    _no_split_modules = ["MistralDecoderLayer", "LatentAttentionModel"]
+    def __init__(self, config: NVEmbedConfig):
+        super().__init__(config)
+        self.latent_attention_model = AutoModel.from_config(config.latent_attention_config)
+        self.embedding_model = AutoModel.from_config(
+            config.text_config,
+        ) if config.text_config is not None else None
+        self.tokenizer = AutoTokenizer.from_pretrained(config.text_config._name_or_path) if config.text_config is not None else None
+        self.padding_side = config.padding_side
+        self.is_mask_instruction = config.is_mask_instruction
+        self.add_eos = config.add_eos
+        self.mask_type = config.mask_type
+        if config.add_pad_token and self.tokenizer is not None:
+            self.add_pad_token()
+    def add_pad_token(self):
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.tokenizer.padding_side = self.padding_side
+    def prepare_kwargs_from_batch(self, batch_dict: dict, instruction_lens: int, device: torch.device):
+        batch_dict = move_to_device(batch_dict, device)
+        attention_mask = batch_dict['attention_mask'].clone() if 'attention_mask' in batch_dict else None
+        if (attention_mask is not None and
+            self.padding_side == "right" and
+            self.is_mask_instruction == True and
+            instruction_lens > 0):
+            # Mask out the instruction tokens for mean-pooling
+            attention_mask[:, :instruction_lens] = 0
+        features: NVEmbedFeatures = {
+            'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
+            'attention_mask': batch_dict['attention_mask'],
+            'pool_mask': attention_mask,
+        }
+        return features
+    @torch.no_grad()
+    def _do_encode(self,
+        prompts: List[str],
+        batch_size: int=1,
+        instruction: str="",
+        max_length: int=4096,
+        num_workers: int=32,
+        **kwargs
+    ) -> Union[np.ndarray, torch.FloatTensor]:
+        dataset: Dataset = Dataset.from_dict({'input_texts': prompts})
+        dataset.set_transform(partial(input_transform_func,
+                                      self.tokenizer,
+                                      always_add_eos=True,
+                                      max_length=max_length,
+                                      instruction=instruction))
+        data_collator = DataCollatorWithPadding(self.tokenizer)
+        data_loader = DataLoader(
+            dataset,
+            batch_size=batch_size,
+            shuffle=False,
+            drop_last=False,
+            num_workers=num_workers,
+            collate_fn=data_collator,
+            pin_memory=True)
+        if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
+            instruction_lens = len(self.tokenizer.tokenize(instruction))
+        else:
+            instruction_lens = 0
+        encoded_embeds = []
+        device = next(self.embedding_model.parameters()).device
+        for batch_dict in tqdm(data_loader, desc='encoding', mininterval=10):
+            features = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
+            embeds=self(**features)["sentence_embeddings"].squeeze(1)
+            encoded_embeds.append(embeds)
+        encoded_embeds = torch.cat(encoded_embeds, axis=0)
+        if "return_numpy" in kwargs and  kwargs.get("return_numpy"):
+            encoded_embeds = encoded_embeds.cpu().detach().numpy()
+        return encoded_embeds
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, pool_mask: Optional[torch.Tensor]=None, return_dict: bool=True):
+        autocast_ctx = torch.autocast if torch.cuda.is_available() else nullcontext
+        with autocast_ctx("cuda"):
+            ## decoder only layer
+            outputs = self.embedding_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+            )
+            ## latent attention layer
+            embeds = self.latent_attention_model(
+                outputs.last_hidden_state,
+                pool_mask,
+            )
+        if not return_dict:
+            return (embeds,)
+        return {"sentence_embeddings": embeds}
+    @torch.no_grad()
+    def encode(self, prompts: List[str], instruction: str="", max_length: int=4096, **kwargs):
+        if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
+            instruction_lens = len(self.tokenizer.tokenize(instruction))
+        else:
+            instruction_lens = 0
+        device = next(self.embedding_model.parameters()).device
+        batch_dict = input_transform_func(self.tokenizer,
+                                          {"input_texts": [prompt for prompt in prompts]},
+                                          always_add_eos=True,
+                                          max_length=max_length,
+                                          instruction=instruction)
+        features: NVEmbedFeatures = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
+        return self(**features)["sentence_embeddings"].squeeze(1)
+## AutoModel Register
+AutoModel.register(NVEmbedConfig, NVEmbedModel)
+AutoModel.register(LatentAttentionConfig, LatentAttentionModel)
+AutoModel.register(BidirectionalMistralConfig, BidirectionalMistralModel)
+## Register for auto class
+NVEmbedModel.register_for_auto_class("AutoModel")
+LatentAttentionModel.register_for_auto_class("AutoModel")
+BidirectionalMistralModel.register_for_auto_class("AutoModel")

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 1024,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "max_length": 1024,
+  "model_max_length": 1024,
+  "pad_to_multiple_of": null,
+  "pad_token": "</s>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "stride": 0,
+  "tokenizer_class": "LlamaTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}