Commit ·
4c108e3
0
Parent(s):
Duplicate from sarvamai/sarvam-105b
Browse filesCo-authored-by: Rahul <rahular@users.noreply.huggingface.co>
This view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +36 -0
- README.md +298 -0
- chat_template.jinja +97 -0
- config.json +56 -0
- configuration_sarvam_moe.py +140 -0
- generation_config.json +6 -0
- hotpatch_vllm.py +114 -0
- model-00001-of-00085.safetensors +3 -0
- model-00002-of-00085.safetensors +3 -0
- model-00003-of-00085.safetensors +3 -0
- model-00004-of-00085.safetensors +3 -0
- model-00005-of-00085.safetensors +3 -0
- model-00006-of-00085.safetensors +3 -0
- model-00007-of-00085.safetensors +3 -0
- model-00008-of-00085.safetensors +3 -0
- model-00009-of-00085.safetensors +3 -0
- model-00010-of-00085.safetensors +3 -0
- model-00011-of-00085.safetensors +3 -0
- model-00012-of-00085.safetensors +3 -0
- model-00013-of-00085.safetensors +3 -0
- model-00014-of-00085.safetensors +3 -0
- model-00015-of-00085.safetensors +3 -0
- model-00016-of-00085.safetensors +3 -0
- model-00017-of-00085.safetensors +3 -0
- model-00018-of-00085.safetensors +3 -0
- model-00019-of-00085.safetensors +3 -0
- model-00020-of-00085.safetensors +3 -0
- model-00021-of-00085.safetensors +3 -0
- model-00022-of-00085.safetensors +3 -0
- model-00023-of-00085.safetensors +3 -0
- model-00024-of-00085.safetensors +3 -0
- model-00025-of-00085.safetensors +3 -0
- model-00026-of-00085.safetensors +3 -0
- model-00027-of-00085.safetensors +3 -0
- model-00028-of-00085.safetensors +3 -0
- model-00029-of-00085.safetensors +3 -0
- model-00030-of-00085.safetensors +3 -0
- model-00031-of-00085.safetensors +3 -0
- model-00032-of-00085.safetensors +3 -0
- model-00033-of-00085.safetensors +3 -0
- model-00034-of-00085.safetensors +3 -0
- model-00035-of-00085.safetensors +3 -0
- model-00036-of-00085.safetensors +3 -0
- model-00037-of-00085.safetensors +3 -0
- model-00038-of-00085.safetensors +3 -0
- model-00039-of-00085.safetensors +3 -0
- model-00040-of-00085.safetensors +3 -0
- model-00041-of-00085.safetensors +3 -0
- model-00042-of-00085.safetensors +3 -0
- model-00043-of-00085.safetensors +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
- hi
|
| 5 |
+
- bn
|
| 6 |
+
- ta
|
| 7 |
+
- te
|
| 8 |
+
- mr
|
| 9 |
+
- gu
|
| 10 |
+
- kn
|
| 11 |
+
- ml
|
| 12 |
+
- pa
|
| 13 |
+
- or
|
| 14 |
+
- as
|
| 15 |
+
- ur
|
| 16 |
+
- sa
|
| 17 |
+
- ne
|
| 18 |
+
- sd
|
| 19 |
+
- kok
|
| 20 |
+
- mai
|
| 21 |
+
- doi
|
| 22 |
+
- mni
|
| 23 |
+
- sat
|
| 24 |
+
- ks
|
| 25 |
+
- bo
|
| 26 |
+
library_name: transformers
|
| 27 |
+
license: apache-2.0
|
| 28 |
+
pipeline_tag: text-generation
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+

|
| 32 |
+
|
| 33 |
+
Want a smaller model? Download [Sarvam-30B](https://huggingface.co/sarvamai/sarvam-30b/)!
|
| 34 |
+
|
| 35 |
+
## Index
|
| 36 |
+
|
| 37 |
+
1. [Introduction](#introduction)
|
| 38 |
+
2. [Architecture](#architecture)
|
| 39 |
+
3. [Benchmarks](#benchmarks)
|
| 40 |
+
- Knowledge & Coding
|
| 41 |
+
- Reasoning & Math
|
| 42 |
+
- Agentic
|
| 43 |
+
4. [Inference](#inference)
|
| 44 |
+
- Hugging Face
|
| 45 |
+
- [vLLM](https://github.com/vllm-project/vllm)
|
| 46 |
+
- [SGLang](https://github.com/sgl-project/sglang)
|
| 47 |
+
5. [Footnote](#footnote)
|
| 48 |
+
6. [Citation](#citation)
|
| 49 |
+
|
| 50 |
+
## Introduction
|
| 51 |
+
|
| 52 |
+
**Sarvam-105B** is an advanced Mixture-of-Experts (MoE) model with 10.3B active parameters, designed for superior performance across a wide range of complex tasks. It is highly optimized for complex reasoning, with particular strength in agentic tasks, mathematics, and coding.
|
| 53 |
+
|
| 54 |
+
Sarvam-105B is a top-tier performer, consistently matching or surpassing several major closed-source models and staying within a narrow margin of frontier models across diverse reasoning and agentic benchmarks. It demonstrates exceptional agentic and reasoning capabilities in real-world applications such as web search and technical troubleshooting.
|
| 55 |
+
|
| 56 |
+
A major focus during training was the Indian context and languages, resulting in **state-of-the-art performance across 22 Indian languages** for its model size.
|
| 57 |
+
|
| 58 |
+
Sarvam-105B is open-sourced under the **Apache License**. For more details, see our [blog](https://www.sarvam.ai/blogs/sarvam-30b-105b).
|
| 59 |
+
|
| 60 |
+
## Architecture
|
| 61 |
+
|
| 62 |
+
The 105B model adopts an MLA-style attention stack with decoupled QK head dimensions (`q_head_dim=192` split into RoPE and noPE components, `v_head_dim=128`) and a large head_dim of 576, enabling higher representational bandwidth per head while keeping the hidden size at 4096. This approach improves attention expressivity and long-context extrapolation (via YaRN scaling with a factor of 40 and 128K context). It has an `intermediate_size` (16384) and `moe_intermediate_size` (2048), combined with top-8 routing over 128 experts, which increases per-token active capacity while keeping activation cost manageable. The model has one shared expert, a routed scaling factor of 2.5, and auxiliary-loss-free router balancing.
|
| 63 |
+
|
| 64 |
+
## Benchmarks
|
| 65 |
+
|
| 66 |
+
<details>
|
| 67 |
+
<summary>Knowledge & Coding</summary>
|
| 68 |
+
|
| 69 |
+
| Benchmark | Sarvam-105B | GLM-4.5-Air | GPT-OSS-120B | Qwen3-Next-80B-A3B-Thinking |
|
| 70 |
+
|---|---|---|---|---|
|
| 71 |
+
| Math500 | 98.6 | 97.2 | 97.0 | 98.2 |
|
| 72 |
+
| Live Code Bench v6 | 71.7 | 59.5 | 72.3 | 68.7 |
|
| 73 |
+
| MMLU | 90.6 | 87.3 | 90.0 | 90.0 |
|
| 74 |
+
| MMLU Pro | 81.7 | 81.4 | 80.8 | 82.7 |
|
| 75 |
+
| Writing Bench | 80.5 | 83.8 | 86.5 | 84.6 |
|
| 76 |
+
| Arena Hard v2 | 71.0 | 68.1 | 88.5 | 68.2 |
|
| 77 |
+
| IF Eval | 84.8 | 83.5 | 85.4 | 88.9 |
|
| 78 |
+
|
| 79 |
+
</details>
|
| 80 |
+
|
| 81 |
+
<details>
|
| 82 |
+
<summary>Reasoning & Math</summary>
|
| 83 |
+
|
| 84 |
+
| Benchmark | Sarvam-105B | GLM-4.5-Air | GPT-OSS-120B | Qwen3-Next-80B-A3B-Thinking |
|
| 85 |
+
|---|---|---|---|---|
|
| 86 |
+
| GPQA Diamond | 78.7 | 75.0 | 80.1 | 77.2 |
|
| 87 |
+
| AIME 25 (w/ Tools) | 88.3 (96.7) | 83.3 | 90.0 | 87.8 |
|
| 88 |
+
| Beyond AIME | 69.1 | 61.5 | 51.0 | 68.0 |
|
| 89 |
+
| HMMT (Feb 25) | 85.8 | 69.2 | 90.0 | 73.9 |
|
| 90 |
+
| HMMT (Nov 25) | 85.8 | 75.0 | 90.0 | 80.0 |
|
| 91 |
+
|
| 92 |
+
</details>
|
| 93 |
+
|
| 94 |
+
<details>
|
| 95 |
+
<summary>Agentic</summary>
|
| 96 |
+
|
| 97 |
+
| Benchmark | Sarvam-105B | GLM-4.5-Air | GPT-OSS-120B | Qwen3-Next-80B-A3B-Thinking |
|
| 98 |
+
|---|---|---|---|---|
|
| 99 |
+
| BrowseComp | 49.5 | 21.3 | - | 38.0 |
|
| 100 |
+
| SWE Bench Verified (SWE-Agent Harness) | 45.0 | 57.6 | 50.6 | 60.9 |
|
| 101 |
+
| τ² Bench (avg.) | 68.3 | 53.2 | 65.8 | 55.0 |
|
| 102 |
+
|
| 103 |
+
> See footnote for evaluation details.
|
| 104 |
+
|
| 105 |
+
</details>
|
| 106 |
+
|
| 107 |
+
## Inference
|
| 108 |
+
|
| 109 |
+
<details>
|
| 110 |
+
<summary>Huggingface</summary>
|
| 111 |
+
|
| 112 |
+
```python
|
| 113 |
+
import torch
|
| 114 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
| 115 |
+
|
| 116 |
+
model_name = "sarvamai/sarvam-105b"
|
| 117 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 118 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto")
|
| 119 |
+
|
| 120 |
+
def generate_text(
|
| 121 |
+
prompt: str,
|
| 122 |
+
max_new_tokens: int = 2048,
|
| 123 |
+
temperature: float = 0.8,
|
| 124 |
+
top_p: float = 0.95,
|
| 125 |
+
repetition_penalty: float = 1.0,
|
| 126 |
+
) -> None:
|
| 127 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda:0")
|
| 128 |
+
|
| 129 |
+
generation_config = GenerationConfig(
|
| 130 |
+
max_new_tokens=max_new_tokens,
|
| 131 |
+
repetition_penalty=repetition_penalty,
|
| 132 |
+
temperature=temperature,
|
| 133 |
+
top_p=top_p,
|
| 134 |
+
do_sample=True,
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
with torch.no_grad():
|
| 138 |
+
output_ids = model.generate(
|
| 139 |
+
input_ids=inputs["input_ids"],
|
| 140 |
+
attention_mask=inputs["attention_mask"],
|
| 141 |
+
generation_config=generation_config,
|
| 142 |
+
)
|
| 143 |
+
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 144 |
+
|
| 145 |
+
prompts = [
|
| 146 |
+
"Which country won the FIFA World Cup in 2012?",
|
| 147 |
+
]
|
| 148 |
+
|
| 149 |
+
for prompt in prompts:
|
| 150 |
+
templated_prompt = tokenizer.apply_chat_template(
|
| 151 |
+
[{"role": "user", "content": prompt}],
|
| 152 |
+
tokenize=False,
|
| 153 |
+
add_generation_prompt=True,
|
| 154 |
+
enable_thinking=True
|
| 155 |
+
)
|
| 156 |
+
output = generate_text(templated_prompt, max_new_tokens=512)
|
| 157 |
+
print("Prompt: ", prompt)
|
| 158 |
+
print("Generated text: ", output)
|
| 159 |
+
print("=" * 100)
|
| 160 |
+
```
|
| 161 |
+
</details>
|
| 162 |
+
|
| 163 |
+
<details>
|
| 164 |
+
<summary>SGLang</summary>
|
| 165 |
+
|
| 166 |
+
**Install latest SGLang from source**
|
| 167 |
+
|
| 168 |
+
```bash
|
| 169 |
+
git clone https://github.com/sgl-project/sglang.git
|
| 170 |
+
cd sglang
|
| 171 |
+
pip install -e "python[all]"
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
**Instantiate model and Run**
|
| 175 |
+
|
| 176 |
+
```python
|
| 177 |
+
import sglang as sgl
|
| 178 |
+
from transformers import AutoTokenizer
|
| 179 |
+
|
| 180 |
+
model_path = "sarvamai/sarvam-105b"
|
| 181 |
+
engine = sgl.Engine(
|
| 182 |
+
model_path=model_path,
|
| 183 |
+
tp_size=4,
|
| 184 |
+
mem_fraction_static=0.70,
|
| 185 |
+
trust_remote_code=True,
|
| 186 |
+
dtype="bfloat16",
|
| 187 |
+
moe_runner_backend="flashinfer_cutedsl",
|
| 188 |
+
prefill_attention_backend="fa3",
|
| 189 |
+
decode_attention_backend="flashmla",
|
| 190 |
+
disable_radix_cache=False,
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
sampling_params = {
|
| 194 |
+
"temperature": 0.8,
|
| 195 |
+
"max_new_tokens": 2048,
|
| 196 |
+
"repetition_penalty": 1.0,
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
prompts = [
|
| 200 |
+
"Which band released the album Dark Side of the Moon in 1973?",
|
| 201 |
+
]
|
| 202 |
+
|
| 203 |
+
outputs = engine.generate([
|
| 204 |
+
tokenizer.apply_chat_template([
|
| 205 |
+
{"role": "user", "content": prompt}],
|
| 206 |
+
tokenize=False,
|
| 207 |
+
add_generation_prompt=True,
|
| 208 |
+
enable_thinking=True)
|
| 209 |
+
for prompt in prompts],
|
| 210 |
+
sampling_params)
|
| 211 |
+
for p, o in zip(prompts, outputs):
|
| 212 |
+
print("Prompt: ", p)
|
| 213 |
+
print("Generated text: ", o['text'])
|
| 214 |
+
print("=" * 100)
|
| 215 |
+
```
|
| 216 |
+
</details>
|
| 217 |
+
|
| 218 |
+
<details>
|
| 219 |
+
<summary>vLLM</summary>
|
| 220 |
+
|
| 221 |
+
Note: currently a PR is open for native support for the Sarvam models in vLLM ([link](https://github.com/vllm-project/vllm/pull/33942)). Therefore, we have 2 options here.
|
| 222 |
+
|
| 223 |
+
#### Option 1: install from source (hard)
|
| 224 |
+
|
| 225 |
+
* Use the custom fork here: [link](https://github.com/rahul-sarvam/vllm)
|
| 226 |
+
* Follow the instructions here to install from source: [link](https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html#build-wheel-from-source)
|
| 227 |
+
|
| 228 |
+
#### Option 2: hot-patch (easy)
|
| 229 |
+
|
| 230 |
+
* Run [hotpatch_vllm.py](./hotpatch_vllm.py)
|
| 231 |
+
* This will do the following:
|
| 232 |
+
* install vllm=0.15.0
|
| 233 |
+
* add 2 model entries to `registry.py`
|
| 234 |
+
* download the model executors for `sarvam-105b` and `sarvam-30b`
|
| 235 |
+
|
| 236 |
+
Once this is done, you can run vLLM as usual
|
| 237 |
+
|
| 238 |
+
```python
|
| 239 |
+
from vllm import LLM, SamplingParams
|
| 240 |
+
from transformers import AutoTokenizer
|
| 241 |
+
|
| 242 |
+
model_path = "sarvamai/sarvam-105b"
|
| 243 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 244 |
+
llm = LLM(model=model_path,
|
| 245 |
+
trust_remote_code=True,
|
| 246 |
+
max_model_len=2048,
|
| 247 |
+
tensor_parallel_size=8,
|
| 248 |
+
max_num_seqs=16,
|
| 249 |
+
)
|
| 250 |
+
sampling_params = SamplingParams(
|
| 251 |
+
temperature=0.8,
|
| 252 |
+
max_tokens=2048,
|
| 253 |
+
repetition_penalty=1.0,
|
| 254 |
+
spaces_between_special_tokens=True
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
prompts = [
|
| 258 |
+
"Which artist painted The Persistence of Memory (the melting clocks)?",
|
| 259 |
+
]
|
| 260 |
+
|
| 261 |
+
outputs = llm.generate([
|
| 262 |
+
tokenizer.apply_chat_template([
|
| 263 |
+
{"role": "user", "content": prompt}],
|
| 264 |
+
tokenize=False,
|
| 265 |
+
add_generation_prompt=True,
|
| 266 |
+
enable_thinking=True)
|
| 267 |
+
for prompt in prompts],
|
| 268 |
+
sampling_params)
|
| 269 |
+
for p, o in zip(prompts, outputs):
|
| 270 |
+
print("Prompt: ", p)
|
| 271 |
+
print("Generated text: ", o.outputs[0].text)
|
| 272 |
+
print("=" * 100)
|
| 273 |
+
```
|
| 274 |
+
</details>
|
| 275 |
+
|
| 276 |
+
## Footnote
|
| 277 |
+
|
| 278 |
+
* **General settings**: All benchmarks are evaluated with a maximum context length of 65,536 tokens.
|
| 279 |
+
* **Reasoning & Math benchmarks** (Math500, MMLU, MMLU Pro, GPQA Diamond, AIME 25, Beyond AIME, HMMT): Evaluated with `temperature=1.0, top_p=1.0, max_new_tokens=65536`.
|
| 280 |
+
* **Coding & Knowledge benchmarks** (Live Code Bench v6, Arena Hard v2, IF Eval):
|
| 281 |
+
Evaluated with `temperature=1.0, top_p=1.0, max_new_tokens=65536`.
|
| 282 |
+
* **Writing Bench**:
|
| 283 |
+
Responses generated using official Writing-Bench parameters:
|
| 284 |
+
`temperature=0.7, top_p=0.8, top_k=20, max_length=16000`.
|
| 285 |
+
Scoring performed using the official Writing-Bench critic model with:
|
| 286 |
+
`temperature=1.0, top_p=0.95, max_length=2048`.
|
| 287 |
+
* **Agentic benchmarks** (BrowseComp, SWE Bench Verified, τ² Bench): Evaluated with `temperature=0.5, top_p=1.0, max_new_tokens=32768`.
|
| 288 |
+
|
| 289 |
+
## Citation
|
| 290 |
+
```
|
| 291 |
+
@misc{sarvam_sovereign_models,
|
| 292 |
+
title = {Introducing Sarvam's Sovereign Models},
|
| 293 |
+
author = {{Sarvam Foundation Models Team}},
|
| 294 |
+
year = {2026},
|
| 295 |
+
howpublished = {\url{https://www.sarvam.ai/blogs/sarvam-30b-105b}},
|
| 296 |
+
note = {Accessed: 2026-03-03}
|
| 297 |
+
}
|
| 298 |
+
```
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{- '[@BOS@]\n' }}
|
| 2 |
+
{%- if tools -%}
|
| 3 |
+
<|start_of_turn|><|tool_declare|>
|
| 4 |
+
<tools>
|
| 5 |
+
{% for tool in tools %}
|
| 6 |
+
{{ tool | tojson(ensure_ascii=False) }}
|
| 7 |
+
{% endfor %}
|
| 8 |
+
</tools>
|
| 9 |
+
{{- '<|end_of_turn|>\n' }}{%- endif -%}
|
| 10 |
+
{%- macro visible_text(content) -%}
|
| 11 |
+
{%- if content is string -%}
|
| 12 |
+
{{- content }}
|
| 13 |
+
{%- elif content is iterable and content is not mapping -%}
|
| 14 |
+
{%- for item in content -%}
|
| 15 |
+
{%- if item is mapping and item.type == 'text' -%}
|
| 16 |
+
{{- item.text }}
|
| 17 |
+
{%- elif item is string -%}
|
| 18 |
+
{{- item }}
|
| 19 |
+
{%- endif -%}
|
| 20 |
+
{%- endfor -%}
|
| 21 |
+
{%- elif content is none -%}
|
| 22 |
+
{{- '' }}
|
| 23 |
+
{%- else -%}
|
| 24 |
+
{{- content }}
|
| 25 |
+
{%- endif -%}
|
| 26 |
+
{%- endmacro -%}
|
| 27 |
+
{%- set ns = namespace(last_user_index=-1) %}
|
| 28 |
+
{%- for m in messages %}
|
| 29 |
+
{%- if m.role == 'user' %}
|
| 30 |
+
{% set ns.last_user_index = loop.index0 -%}
|
| 31 |
+
{%- endif %}
|
| 32 |
+
{%- endfor %}
|
| 33 |
+
{% for m in messages %}
|
| 34 |
+
{%- if m.role == 'user' -%}<|start_of_turn|><|user|>
|
| 35 |
+
{{ visible_text(m.content) }}
|
| 36 |
+
{{- '<|nothink|>' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("<|nothink|>")) else '' -}}
|
| 37 |
+
{{- '<|end_of_turn|>\n' }}
|
| 38 |
+
{%- elif m.role == 'assistant' -%}
|
| 39 |
+
{{- '<|start_of_turn|><|assistant|>\n' }}
|
| 40 |
+
{%- set reasoning_content = '' %}
|
| 41 |
+
{%- set content = visible_text(m.content) %}
|
| 42 |
+
{%- if m.reasoning_content is string %}
|
| 43 |
+
{%- set reasoning_content = m.reasoning_content %}
|
| 44 |
+
{%- else %}
|
| 45 |
+
{%- if '</think>' in content %}
|
| 46 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 47 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 48 |
+
{%- endif %}
|
| 49 |
+
{%- endif %}
|
| 50 |
+
{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
|
| 51 |
+
{{ '<think>' + reasoning_content.strip() + '</think>'}}
|
| 52 |
+
{%- else -%}
|
| 53 |
+
{{ '<think></think>' }}
|
| 54 |
+
{%- endif -%}
|
| 55 |
+
{%- if content.strip() -%}
|
| 56 |
+
{{ '\n' + content.strip() }}
|
| 57 |
+
{%- endif -%}
|
| 58 |
+
{% if m.tool_calls %}
|
| 59 |
+
{% for tc in m.tool_calls %}
|
| 60 |
+
{%- if tc.function %}
|
| 61 |
+
{%- set tc = tc.function %}
|
| 62 |
+
{%- endif %}
|
| 63 |
+
{{ '\n<tool_call>' + tc.name }}
|
| 64 |
+
{% set _args = tc.arguments %}
|
| 65 |
+
{% for k, v in _args.items() %}
|
| 66 |
+
<arg_key>{{ k }}</arg_key>
|
| 67 |
+
<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
|
| 68 |
+
{% endfor %}
|
| 69 |
+
</tool_call>{% endfor %}
|
| 70 |
+
{% endif %}
|
| 71 |
+
{{- '<|end_of_turn|>\n' }}
|
| 72 |
+
{%- elif m.role == 'tool' -%}
|
| 73 |
+
{%- if m.content is string -%}
|
| 74 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 75 |
+
{{- '<|start_of_turn|><|observation|>' }}
|
| 76 |
+
{%- endif %}
|
| 77 |
+
{{- '\n<tool_response>\n' }}
|
| 78 |
+
{{- m.content }}
|
| 79 |
+
{{- '\n</tool_response>' }}
|
| 80 |
+
{%- else -%}
|
| 81 |
+
<|start_of_turn|><|observation|>{% for tr in m.content %}
|
| 82 |
+
|
| 83 |
+
<tool_response>
|
| 84 |
+
{{ tr.output if tr.output is defined else tr }}
|
| 85 |
+
</tool_response>{% endfor -%}
|
| 86 |
+
{% endif -%}
|
| 87 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 88 |
+
{{- '<|end_of_turn|>\n' }}{%- endif -%}
|
| 89 |
+
{%- elif m.role == 'system' -%}
|
| 90 |
+
<|start_of_turn|><|system|>
|
| 91 |
+
{{ visible_text(m.content) }}
|
| 92 |
+
{{- '<|end_of_turn|>\n' }}
|
| 93 |
+
{%- endif -%}
|
| 94 |
+
{%- endfor -%}
|
| 95 |
+
{%- if add_generation_prompt -%}
|
| 96 |
+
{{- '<|start_of_turn|><|assistant|>\n' }}
|
| 97 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"SarvamMLAForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"attn_implementation": null,
|
| 7 |
+
"auto_map": {
|
| 8 |
+
"AutoConfig": "configuration_sarvam_moe.SarvamMLAConfig",
|
| 9 |
+
"AutoModel": "modeling_sarvam_moe.SarvamMLAModel",
|
| 10 |
+
"AutoModelForCausalLM": "modeling_sarvam_moe.SarvamMLAForCausalLM"
|
| 11 |
+
},
|
| 12 |
+
"default_theta": 10000.0,
|
| 13 |
+
"dtype": "float32",
|
| 14 |
+
"embedding_dropout": 0.0,
|
| 15 |
+
"eos_token_id": 1,
|
| 16 |
+
"first_k_dense_replace": 1,
|
| 17 |
+
"head_dim": 576,
|
| 18 |
+
"hidden_act": "silu",
|
| 19 |
+
"hidden_size": 4096,
|
| 20 |
+
"initializer_range": 0.006,
|
| 21 |
+
"intermediate_size": 16384,
|
| 22 |
+
"kv_lora_rank": 512,
|
| 23 |
+
"max_position_embeddings": 131072,
|
| 24 |
+
"model_type": "sarvam_mla",
|
| 25 |
+
"moe_intermediate_size": 2048,
|
| 26 |
+
"moe_router_enable_expert_bias": true,
|
| 27 |
+
"num_attention_heads": 64,
|
| 28 |
+
"num_experts": 128,
|
| 29 |
+
"num_experts_per_tok": 8,
|
| 30 |
+
"num_hidden_layers": 32,
|
| 31 |
+
"num_shared_experts": 1,
|
| 32 |
+
"output_dropout": 0.0,
|
| 33 |
+
"output_router_logits": false,
|
| 34 |
+
"pad_token_id": 0,
|
| 35 |
+
"q_head_dim": 192,
|
| 36 |
+
"qk_nope_head_dim": 128,
|
| 37 |
+
"qk_rope_head_dim": 64,
|
| 38 |
+
"rms_norm_eps": 1e-06,
|
| 39 |
+
"rope_scaling": {
|
| 40 |
+
"beta_fast": 32,
|
| 41 |
+
"beta_slow": 1,
|
| 42 |
+
"factor": 40,
|
| 43 |
+
"mscale": 1.0,
|
| 44 |
+
"mscale_all_dim": 1.0,
|
| 45 |
+
"original_max_position_embeddings": 4096,
|
| 46 |
+
"type": "deepseek_yarn"
|
| 47 |
+
},
|
| 48 |
+
"rope_theta": 10000.0,
|
| 49 |
+
"routed_scaling_factor": 2.5,
|
| 50 |
+
"tie_word_embeddings": false,
|
| 51 |
+
"transformers_version": "4.57.2",
|
| 52 |
+
"use_cache": true,
|
| 53 |
+
"use_qk_norm": true,
|
| 54 |
+
"v_head_dim": 128,
|
| 55 |
+
"vocab_size": 262144
|
| 56 |
+
}
|
configuration_sarvam_moe.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers.configuration_utils import PretrainedConfig
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class SarvamMLAConfig(PretrainedConfig):
|
| 5 |
+
model_type = "sarvam_mla"
|
| 6 |
+
|
| 7 |
+
base_model_pp_plan = {
|
| 8 |
+
"embed_tokens": (["input_ids"], ["inputs_embeds"]),
|
| 9 |
+
"layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
|
| 10 |
+
"norm": (["hidden_states"], ["hidden_states"]),
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
base_model_tp_plan = {
|
| 14 |
+
"layers.*.self_attn.q_proj": "colwise",
|
| 15 |
+
"layers.*.self_attn.kv_b_proj": "colwise",
|
| 16 |
+
"layers.*.self_attn.o_proj": "rowwise",
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
def __init__(
|
| 20 |
+
self,
|
| 21 |
+
vocab_size: int = 262144,
|
| 22 |
+
hidden_size: int = 4096,
|
| 23 |
+
num_hidden_layers: int = 32,
|
| 24 |
+
intermediate_size: int = 16384,
|
| 25 |
+
moe_intermediate_size: int = 2048,
|
| 26 |
+
num_experts: int = 128,
|
| 27 |
+
num_experts_per_tok: int = 8,
|
| 28 |
+
num_shared_experts: int = 1,
|
| 29 |
+
first_k_dense_replace: int = 1,
|
| 30 |
+
num_attention_heads: int = 64,
|
| 31 |
+
qk_rope_head_dim: int = 64,
|
| 32 |
+
qk_nope_head_dim: int = 128,
|
| 33 |
+
kv_lora_rank: int = 512,
|
| 34 |
+
v_head_dim: int = 128,
|
| 35 |
+
max_position_embeddings: int = 4096,
|
| 36 |
+
rope_theta: float = 10000.0,
|
| 37 |
+
rope_scaling: dict = None,
|
| 38 |
+
attention_dropout: float = 0.0,
|
| 39 |
+
output_dropout: float = 0.0,
|
| 40 |
+
rms_norm_eps: float = 1e-6,
|
| 41 |
+
hidden_act: str = "silu",
|
| 42 |
+
use_cache: bool = True,
|
| 43 |
+
use_qk_norm: bool = True,
|
| 44 |
+
moe_router_enable_expert_bias: bool = True,
|
| 45 |
+
routed_scaling_factor: float = 2.5,
|
| 46 |
+
output_router_logits: bool = False,
|
| 47 |
+
tie_word_embeddings: bool = False,
|
| 48 |
+
pad_token_id: int = 0,
|
| 49 |
+
eos_token_id: int = 1,
|
| 50 |
+
embedding_dropout: float = 0.0,
|
| 51 |
+
initializer_range: float = 0.006,
|
| 52 |
+
attn_implementation: str = "eager",
|
| 53 |
+
**kwargs,
|
| 54 |
+
):
|
| 55 |
+
# core geometry
|
| 56 |
+
self.vocab_size = vocab_size
|
| 57 |
+
self.hidden_size = hidden_size
|
| 58 |
+
self.num_hidden_layers = num_hidden_layers
|
| 59 |
+
self.intermediate_size = intermediate_size
|
| 60 |
+
self.num_attention_heads = num_attention_heads
|
| 61 |
+
self.max_position_embeddings = max_position_embeddings
|
| 62 |
+
|
| 63 |
+
# MLA geometry
|
| 64 |
+
self.qk_rope_head_dim = qk_rope_head_dim
|
| 65 |
+
self.qk_nope_head_dim = qk_nope_head_dim
|
| 66 |
+
self.kv_lora_rank = kv_lora_rank
|
| 67 |
+
self.v_head_dim = v_head_dim
|
| 68 |
+
# convenient derived dim
|
| 69 |
+
self.q_head_dim = qk_rope_head_dim + qk_nope_head_dim
|
| 70 |
+
# vLLM MLA expects "head size" = Lkv + R, not hidden_size/num_heads.
|
| 71 |
+
self.head_dim = int(self.kv_lora_rank + self.qk_rope_head_dim)
|
| 72 |
+
|
| 73 |
+
# MoE
|
| 74 |
+
self.moe_intermediate_size = moe_intermediate_size
|
| 75 |
+
self.num_experts = num_experts
|
| 76 |
+
self.num_experts_per_tok = num_experts_per_tok
|
| 77 |
+
self.num_shared_experts = num_shared_experts
|
| 78 |
+
self.first_k_dense_replace = first_k_dense_replace
|
| 79 |
+
|
| 80 |
+
# Router
|
| 81 |
+
self.moe_router_enable_expert_bias = moe_router_enable_expert_bias
|
| 82 |
+
self.routed_scaling_factor = routed_scaling_factor
|
| 83 |
+
self.output_router_logits = output_router_logits
|
| 84 |
+
|
| 85 |
+
# dropouts / norms / init
|
| 86 |
+
self.attention_dropout = attention_dropout
|
| 87 |
+
self.output_dropout = output_dropout
|
| 88 |
+
self.embedding_dropout = embedding_dropout
|
| 89 |
+
self.rms_norm_eps = rms_norm_eps
|
| 90 |
+
self.initializer_range = initializer_range
|
| 91 |
+
self.hidden_act = hidden_act
|
| 92 |
+
|
| 93 |
+
# rope / cache
|
| 94 |
+
self.rope_theta = rope_theta
|
| 95 |
+
self.use_cache = use_cache
|
| 96 |
+
self.use_qk_norm = use_qk_norm
|
| 97 |
+
self.rope_scaling = rope_scaling
|
| 98 |
+
self.default_theta = 10000.0
|
| 99 |
+
|
| 100 |
+
if self.rope_scaling is None:
|
| 101 |
+
self.rope_scaling = {
|
| 102 |
+
'beta_fast': 32,
|
| 103 |
+
'beta_slow': 1,
|
| 104 |
+
'factor': 40,
|
| 105 |
+
'mscale': 1.0,
|
| 106 |
+
'mscale_all_dim': 1.0,
|
| 107 |
+
'original_max_position_embeddings': 4096,
|
| 108 |
+
'rope_type': 'deepseek_yarn',
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
self.attn_implementation = attn_implementation
|
| 112 |
+
self._attn_implementation = attn_implementation
|
| 113 |
+
|
| 114 |
+
if "_attn_implementation" in kwargs:
|
| 115 |
+
self._attn_implementation = kwargs.pop("_attn_implementation")
|
| 116 |
+
if hasattr(self, "attn_implementation"):
|
| 117 |
+
self.attn_implementation = self._attn_implementation
|
| 118 |
+
|
| 119 |
+
super().__init__(
|
| 120 |
+
pad_token_id=pad_token_id,
|
| 121 |
+
eos_token_id=eos_token_id,
|
| 122 |
+
tie_word_embeddings=tie_word_embeddings,
|
| 123 |
+
**kwargs,
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
def convert_rope_params_to_dict(self, ignore_keys_at_rope_validation: set | None = None, **kwargs):
|
| 127 |
+
rope_scaling = kwargs.pop("rope_scaling", None)
|
| 128 |
+
self.rope_parameters = rope_scaling or self.rope_parameters
|
| 129 |
+
self.rope_parameters = self.rope_parameters if self.rope_parameters is not None else {}
|
| 130 |
+
|
| 131 |
+
# Standardize and validate the correctness of rotary position embeddings parameters
|
| 132 |
+
self.rope_parameters.setdefault("rope_theta", kwargs.pop("rope_theta", self.default_theta))
|
| 133 |
+
self.standardize_rope_params()
|
| 134 |
+
self.validate_rope(ignore_keys=ignore_keys_at_rope_validation)
|
| 135 |
+
|
| 136 |
+
# Convert to float because RoPE fn expect a float. Models on the hub were saved as int
|
| 137 |
+
for key in ["beta_fast", "beta_slow", "factor"]:
|
| 138 |
+
if key in self.rope_parameters:
|
| 139 |
+
self.rope_parameters[key] = float(self.rope_parameters[key])
|
| 140 |
+
return kwargs
|
generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": 26,
|
| 4 |
+
"pad_token_id": 0,
|
| 5 |
+
"transformers_version": "4.57.2"
|
| 6 |
+
}
|
hotpatch_vllm.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import sys
|
| 5 |
+
import subprocess
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from urllib.request import urlopen, Request
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
HF_BLOB_URL = "https://huggingface.co/sarvamai/sarvam-105b/blob/main/sarvam.py"
|
| 11 |
+
|
| 12 |
+
NEW_LINES = [
|
| 13 |
+
' "SarvamMoEForCausalLM": ("sarvam", "SarvamMoEForCausalLM"),\n',
|
| 14 |
+
' "SarvamMLAForCausalLM": ("sarvam", "SarvamMLAForCausalLM"),\n',
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def run(cmd: list[str]) -> None:
|
| 19 |
+
print(f"+ {' '.join(cmd)}")
|
| 20 |
+
subprocess.check_call(cmd)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def pip_install_vllm() -> None:
|
| 24 |
+
run([sys.executable, "-m", "pip", "install", "vllm==0.15.0"])
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def find_vllm_dir() -> Path:
|
| 28 |
+
import vllm # type: ignore
|
| 29 |
+
|
| 30 |
+
vllm_dir = Path(vllm.__file__).resolve().parent
|
| 31 |
+
print(f"Detected vLLM package dir: {vllm_dir}")
|
| 32 |
+
return vllm_dir
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def patch_text_generation_models(registry_path: Path) -> None:
|
| 36 |
+
if not registry_path.exists():
|
| 37 |
+
raise FileNotFoundError(f"registry.py not found at: {registry_path}")
|
| 38 |
+
|
| 39 |
+
text = registry_path.read_text(encoding="utf-8")
|
| 40 |
+
lines = text.splitlines(keepends=True)
|
| 41 |
+
|
| 42 |
+
# Idempotency: if both keys already present, do nothing
|
| 43 |
+
if (
|
| 44 |
+
any('"SarvamMoEForCausalLM"' in l for l in lines)
|
| 45 |
+
and any('"SarvamMLAForCausalLM"' in l for l in lines)
|
| 46 |
+
):
|
| 47 |
+
print("registry.py already contains Sarvam entries. Skipping patch.")
|
| 48 |
+
return
|
| 49 |
+
|
| 50 |
+
# Find the start of the _TEXT_GENERATION_MODELS dict
|
| 51 |
+
start_idx = None
|
| 52 |
+
for i, line in enumerate(lines):
|
| 53 |
+
if line.strip() == "_TEXT_GENERATION_MODELS = {":
|
| 54 |
+
start_idx = i
|
| 55 |
+
break
|
| 56 |
+
|
| 57 |
+
if start_idx is None:
|
| 58 |
+
raise RuntimeError(
|
| 59 |
+
"Could not find '_TEXT_GENERATION_MODELS = {' in registry.py. "
|
| 60 |
+
"vLLM version/layout may differ."
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Find the matching closing brace for that dict using brace depth
|
| 64 |
+
depth = 0
|
| 65 |
+
end_idx = None
|
| 66 |
+
for j in range(start_idx, len(lines)):
|
| 67 |
+
depth += lines[j].count("{")
|
| 68 |
+
depth -= lines[j].count("}")
|
| 69 |
+
if j > start_idx and depth == 0:
|
| 70 |
+
end_idx = j
|
| 71 |
+
break
|
| 72 |
+
|
| 73 |
+
if end_idx is None:
|
| 74 |
+
raise RuntimeError("Failed to find end of _TEXT_GENERATION_MODELS dict (brace matching).")
|
| 75 |
+
|
| 76 |
+
# Insert new entries just before the closing brace line
|
| 77 |
+
insert_at = end_idx
|
| 78 |
+
lines[insert_at:insert_at] = NEW_LINES
|
| 79 |
+
|
| 80 |
+
registry_path.write_text("".join(lines), encoding="utf-8")
|
| 81 |
+
print(f"Patched _TEXT_GENERATION_MODELS in: {registry_path}")
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def download_sarvam_py(dst: Path) -> None:
|
| 85 |
+
# Use /raw/ to download file contents, not HTML
|
| 86 |
+
raw_url = HF_BLOB_URL.replace("/blob/", "/raw/")
|
| 87 |
+
print(f"Downloading sarvam.py from: {raw_url}")
|
| 88 |
+
|
| 89 |
+
req = Request(raw_url, headers={"User-Agent": "vllm-hotpatch-script"})
|
| 90 |
+
with urlopen(req) as resp:
|
| 91 |
+
data = resp.read()
|
| 92 |
+
|
| 93 |
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
| 94 |
+
dst.write_bytes(data)
|
| 95 |
+
print(f"Wrote: {dst}")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def main() -> None:
|
| 99 |
+
pip_install_vllm()
|
| 100 |
+
|
| 101 |
+
vllm_dir = find_vllm_dir()
|
| 102 |
+
registry_path = vllm_dir / "model_executor" / "models" / "registry.py"
|
| 103 |
+
sarvam_path = vllm_dir / "model_executor" / "models" / "sarvam.py"
|
| 104 |
+
|
| 105 |
+
patch_text_generation_models(registry_path)
|
| 106 |
+
download_sarvam_py(sarvam_path)
|
| 107 |
+
|
| 108 |
+
print("\nDone.")
|
| 109 |
+
print(f"- Registry patched: {registry_path}")
|
| 110 |
+
print(f"- Sarvam module installed: {sarvam_path}")
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
main()
|
model-00001-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:020162a3f1413743aa1ed567a2f19061fe63d782529059649b76a050365e547f
|
| 3 |
+
size 4941941584
|
model-00002-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c13f5b15068b2dc76a517f1912e87448493a1e0e7955855e737bfe6931c4e7c7
|
| 3 |
+
size 4975543872
|
model-00003-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:796c018f7d7af6684b5e1e4fc437abc5db6d8208af06c44a01309edc2370880e
|
| 3 |
+
size 4999628720
|
model-00004-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:edae50ee1dfe5c03943d11ecd8285533da729d691d488b7ac47e37cd9ba85e12
|
| 3 |
+
size 4977643584
|
model-00005-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb778a4730409646e42dc066698b85dc64a13157b40cce4ba0de2c1ba2c1670
|
| 3 |
+
size 4999628712
|
model-00006-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe67565f0b4dd30f46b50ec58b24e16f235e3ba79b3ef2a7a29b1dc75818196e
|
| 3 |
+
size 4999628736
|
model-00007-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33ee20d2b2a99158fd90b4e156502b79d523bfcf98a2e06c5902b999472a5e90
|
| 3 |
+
size 4977643560
|
model-00008-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0346d9515c3327e35735b685a32177bb0607ff294c3bb080c5943580c663ae11
|
| 3 |
+
size 4999628720
|
model-00009-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3790af0b4aea97c4f443d378bff9ce8b7d79c58bbd96771f8fc98d709a28a21c
|
| 3 |
+
size 4999628784
|
model-00010-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d3e9e1f284dc332bfda0d709fb8fdd93447043ab3b57e3b798bade2775cfb2a
|
| 3 |
+
size 4977643504
|
model-00011-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f03a7da973536f075dfcbcc5afc51d49d393cad1538ade0c2422942b008efae1
|
| 3 |
+
size 4999628720
|
model-00012-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b2d6c8e512c5b81d7fd4d6ad18ea4aa2656bd2aae4ea5b4bf16293b8b432122
|
| 3 |
+
size 4977643592
|
model-00013-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90e89d4af307285773680b4d663fca8b53f6d066ef27c99694331e6d7024c8bb
|
| 3 |
+
size 4999628704
|
model-00014-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c838e1af34b6e2c609521fe15e2cefd20ac9b8c161865e968bdc103f770caca8
|
| 3 |
+
size 4999628728
|
model-00015-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d19fa7b06f3b221983a671aa09241ac4abddac1bd0c3f9d236fcfa7a496de4b
|
| 3 |
+
size 4977643560
|
model-00016-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c826916a5dc292db7d59fe56a700c02f1bf61e8d7bb473853c6ca69a4fec1a8
|
| 3 |
+
size 4999628720
|
model-00017-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54a69b6eec29735a09cf9c790cae51ba4370d4aa1870ef2feca363dbe4eb0181
|
| 3 |
+
size 4999628776
|
model-00018-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5a2f5481e864e89772c9cb5349ad6c4768f9ea3ba94f7d136f53ebf63c27804
|
| 3 |
+
size 4977643512
|
model-00019-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2c93059fcded634f04218a908a17bad1afb424289fb1f97e5e6dfa93ef3f120
|
| 3 |
+
size 4999628720
|
model-00020-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:603e97a59bb27ec736b5d896a127e45671c8b61c8b86a39e1f54db7e4d888ea9
|
| 3 |
+
size 4977643600
|
model-00021-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4b207ef1ebcdccae3bbf28e9867f309edb2ce0c3800b280e885e8e266fcd3d4
|
| 3 |
+
size 4999628696
|
model-00022-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d0c3f20a3c72c0d908677c7ff737393c690f3d93b1f543fff21bbcc08bbab01
|
| 3 |
+
size 4999628728
|
model-00023-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5e851f9a4e74ec7ce50d4a9752c84d66501c5adf6ad87aeb972f1d64f0d2f51
|
| 3 |
+
size 4977643568
|
model-00024-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd69c64cf1ab98085376201fd63b3c66c45326b16ce45b2f66914f156622c4a0
|
| 3 |
+
size 4999628720
|
model-00025-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10cafb2c42cfc260913bde7aaed3375274dd5bdb8a3cf68962468905d45915a6
|
| 3 |
+
size 4999628776
|
model-00026-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5487fcda6b3f8468d0f3c5e8898fad88b6bd67cb364afc6e683b757adec9616b
|
| 3 |
+
size 4977643640
|
model-00027-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf70c35946bad6e6eb0d2a393dba31a7dcea835eb0873b4c905f486810105af7
|
| 3 |
+
size 4999628864
|
model-00028-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05160d3e48222b667982d4033b29a0b5e0b1c6957276a644a7a82bcc52febcf2
|
| 3 |
+
size 4977643744
|
model-00029-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dc78fb681b662ee382cfc2390cc1f483e28ef69e07b623adccfb056165ac93b
|
| 3 |
+
size 4999628840
|
model-00030-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2cad103eec94b218d5b6590373dc1a59217c77458d92c6152b76241981f008a
|
| 3 |
+
size 4999628872
|
model-00031-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30eea0422e12488ceae5fad99580984723939f2ee85ccbc25b6ac89f270dc51d
|
| 3 |
+
size 4977643720
|
model-00032-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84850fa473bb9a91f6611838433bb24535ae001c1b77a3d291804eacc4acb639
|
| 3 |
+
size 4999628864
|
model-00033-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a630b2cd2fabdff9ceee3a6cfce65c5f93d8c99de4b12eccc968c37b6649791
|
| 3 |
+
size 4999628920
|
model-00034-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bff2b3792df9fa7999d9c53bd6d4f3591e0dc3ea0a81d22a8bac39eaf6b667bf
|
| 3 |
+
size 4977643672
|
model-00035-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f329c1cb6b407c4b764b5d5913f89918b55e700f9e790bc01681b052802e75f6
|
| 3 |
+
size 4999628864
|
model-00036-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f395cf50d44e672c07b0059a5775636ac2caf849132dbb16befc0b48bfeaf747
|
| 3 |
+
size 4977643752
|
model-00037-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ec1e4fcfacbc683eaf6db0398212496e23644bcea25f4a436d2d61ff1e3afdc
|
| 3 |
+
size 4999628832
|
model-00038-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7a3d772244e2d55ec9847767f495645d5116d4df24820c530a47d1fc8259ebd
|
| 3 |
+
size 4999628864
|
model-00039-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55f257b34fb41b225672561bae1a41c9c7a3c00cff1619748b8fda8c4d41a27c
|
| 3 |
+
size 4977643720
|
model-00040-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2722c9e30f9f87b364629700ec6ff31bf50284ce3abe58f02eb298612ec5907
|
| 3 |
+
size 4999628864
|
model-00041-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d343984921f3e7e3839b48ae2f109733707f7accf6851c5e0ba6bd72884a91a
|
| 3 |
+
size 4999628912
|
model-00042-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ba68f16a0ce2a70d59f9ceb0220aed17a8674dc01c7ffb5a274f741b08d7294
|
| 3 |
+
size 4977643680
|
model-00043-of-00085.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d2955a473ab2c6c3876bde5eabf3aa92192588ad7cca7eedc5adaad7715050d
|
| 3 |
+
size 4999628864
|