Upload HyperLLM v0.4 - DoRA + DPO training with +11.1% accuracy improvement
Browse files- README.md +165 -238
- adapter_config.json +6 -6
- adapter_model.safetensors +2 -2
- tokenizer.json +2 -2
- tokenizer_config.json +217 -7
- training_args.bin +2 -2
README.md
CHANGED
|
@@ -1,284 +1,211 @@
|
|
| 1 |
---
|
| 2 |
-
base_model: Qwen/Qwen3-4B-Instruct-2507
|
| 3 |
-
library_name: peft
|
| 4 |
license: apache-2.0
|
| 5 |
-
|
| 6 |
-
- en
|
| 7 |
tags:
|
| 8 |
- trading
|
| 9 |
-
- finance
|
| 10 |
- hyperliquid
|
| 11 |
-
-
|
| 12 |
-
- defi
|
| 13 |
- lora
|
|
|
|
| 14 |
- dpo
|
| 15 |
-
-
|
| 16 |
-
|
| 17 |
-
-
|
| 18 |
-
|
|
|
|
| 19 |
pipeline_tag: text-generation
|
|
|
|
| 20 |
---
|
| 21 |
|
| 22 |
-
# HyperLLM-
|
| 23 |
-
|
| 24 |
-
A
|
| 25 |
-
|
| 26 |
-
## Model
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
| 75 |
-
|
|
| 76 |
-
|
|
| 77 |
-
|
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
###
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
### DPO Hyperparameters
|
| 126 |
-
```python
|
| 127 |
-
{
|
| 128 |
-
"beta": 0.1,
|
| 129 |
-
"learning_rate": 5e-7,
|
| 130 |
-
"epochs": 2,
|
| 131 |
-
"batch_size": 4,
|
| 132 |
-
"max_length": 2048
|
| 133 |
-
}
|
| 134 |
-
```
|
| 135 |
-
|
| 136 |
-
### Training Data Distribution
|
| 137 |
-
|
| 138 |
-
**SFT (7,028 examples):**
|
| 139 |
-
|
| 140 |
-
| Category | Examples | % |
|
| 141 |
-
|----------|----------|---|
|
| 142 |
-
| General Instruction | 1,500 | 21.3% |
|
| 143 |
-
| Position Sizing | 800 | 11.4% |
|
| 144 |
-
| Parameter Validation | 800 | 11.4% |
|
| 145 |
-
| Adversarial Percentages | 600 | 8.5% |
|
| 146 |
-
| Multi-step Reasoning | 500 | 7.1% |
|
| 147 |
-
| Edge Cases | 400 | 5.7% |
|
| 148 |
-
| API Examples | 400 | 5.7% |
|
| 149 |
-
| Knowledge Q&A | 373 | 5.3% |
|
| 150 |
-
| Other | 1,655 | 23.6% |
|
| 151 |
-
|
| 152 |
-
**DPO (1,400 preference pairs):**
|
| 153 |
-
|
| 154 |
-
| Failure Mode | Pairs | % |
|
| 155 |
-
|--------------|-------|---|
|
| 156 |
-
| Excessive Leverage | 370 | 26.4% |
|
| 157 |
-
| Position Sizing | 330 | 23.6% |
|
| 158 |
-
| Percentage Confusion | 226 | 16.1% |
|
| 159 |
-
| Risk Violation | 195 | 13.9% |
|
| 160 |
-
| Policy Bypass | 140 | 10.0% |
|
| 161 |
-
| Uncertainty Caution | 139 | 9.9% |
|
| 162 |
|
| 163 |
## Usage
|
| 164 |
|
| 165 |
-
### With
|
| 166 |
-
|
| 167 |
```python
|
| 168 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 169 |
from peft import PeftModel
|
| 170 |
-
import
|
| 171 |
|
| 172 |
-
# Load base model
|
| 173 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 174 |
"Qwen/Qwen3-4B-Instruct-2507",
|
| 175 |
-
torch_dtype=
|
| 176 |
-
device_map="auto"
|
| 177 |
)
|
| 178 |
-
|
| 179 |
-
# Load LoRA adapter
|
| 180 |
model = PeftModel.from_pretrained(base_model, "UVLabs/HyperLLM-4b")
|
| 181 |
tokenizer = AutoTokenizer.from_pretrained("UVLabs/HyperLLM-4b")
|
| 182 |
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 188 |
```
|
| 189 |
|
| 190 |
-
### With
|
| 191 |
-
|
| 192 |
```python
|
| 193 |
-
from
|
| 194 |
-
from peft import PeftModel
|
| 195 |
-
import torch
|
| 196 |
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
)
|
|
|
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
quantization_config=bnb_config,
|
| 206 |
-
device_map="auto",
|
| 207 |
-
)
|
| 208 |
-
|
| 209 |
-
model = PeftModel.from_pretrained(base_model, "UVLabs/HyperLLM-4b")
|
| 210 |
-
tokenizer = AutoTokenizer.from_pretrained("UVLabs/HyperLLM-4b")
|
| 211 |
```
|
| 212 |
|
| 213 |
-
##
|
| 214 |
|
| 215 |
-
###
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
|
|
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
-
###
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
| 228 |
|
| 229 |
-
|
| 230 |
-
Valid sizes would be 1.23 or 1.24.
|
| 231 |
-
```
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
Model: I need clarification. "Increase by 50%" could mean:
|
| 238 |
-
1. Add 50% of current position (1.5x total)
|
| 239 |
-
2. Increase to 50% of account (absolute)
|
| 240 |
-
Which do you mean?
|
| 241 |
-
```
|
| 242 |
-
|
| 243 |
-
## Limitations
|
| 244 |
-
|
| 245 |
-
- **Multi-step Reasoning:** 39.3% accuracy - complex multi-step calculations are challenging for 4B model
|
| 246 |
-
- **API Structure:** 44.2% accuracy - improved but still needs work on exact JSON field names
|
| 247 |
-
- **Adversarial %:** 59.0% accuracy - better handling but still susceptible to tricky percentage phrasing
|
| 248 |
-
|
| 249 |
-
## Hardware Requirements
|
| 250 |
-
|
| 251 |
-
| Mode | VRAM | Notes |
|
| 252 |
-
|------|------|-------|
|
| 253 |
-
| bfloat16 | ~10GB | Full precision inference |
|
| 254 |
-
| 4-bit | ~4GB | Quantized inference |
|
| 255 |
-
| 8-bit | ~6GB | INT8 quantization |
|
| 256 |
-
|
| 257 |
-
## Training Hardware
|
| 258 |
-
|
| 259 |
-
- **Hardware:** NVIDIA A100 80GB SXM
|
| 260 |
-
- **SFT Duration:** ~20 minutes
|
| 261 |
-
- **DPO Duration:** ~17 minutes
|
| 262 |
-
- **Total Cost:** ~$1.50 (RunPod)
|
| 263 |
-
|
| 264 |
-
## Framework Versions
|
| 265 |
-
|
| 266 |
-
- PEFT: 0.18.1
|
| 267 |
-
- TRL: 0.29.0
|
| 268 |
-
- Transformers: 5.2.0
|
| 269 |
-
- PyTorch: 2.10.0
|
| 270 |
-
|
| 271 |
-
## License
|
| 272 |
-
|
| 273 |
-
Apache 2.0
|
| 274 |
|
| 275 |
## Citation
|
| 276 |
|
| 277 |
```bibtex
|
| 278 |
@misc{hyperllm2026,
|
| 279 |
-
title={HyperLLM:
|
| 280 |
author={UVLabs},
|
| 281 |
year={2026},
|
| 282 |
url={https://huggingface.co/UVLabs/HyperLLM-4b}
|
| 283 |
}
|
| 284 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
|
|
|
|
|
|
| 2 |
license: apache-2.0
|
| 3 |
+
base_model: Qwen/Qwen3-4B-Instruct-2507
|
|
|
|
| 4 |
tags:
|
| 5 |
- trading
|
|
|
|
| 6 |
- hyperliquid
|
| 7 |
+
- finance
|
|
|
|
| 8 |
- lora
|
| 9 |
+
- dora
|
| 10 |
- dpo
|
| 11 |
+
- peft
|
| 12 |
+
datasets:
|
| 13 |
+
- custom
|
| 14 |
+
language:
|
| 15 |
+
- en
|
| 16 |
pipeline_tag: text-generation
|
| 17 |
+
library_name: peft
|
| 18 |
---
|
| 19 |
|
| 20 |
+
# HyperLLM-4B v0.4
|
| 21 |
+
|
| 22 |
+
A fine-tuned Qwen3-4B model specialized for agentic trading on [Hyperliquid](https://hyperliquid.xyz). This model is trained to handle position sizing calculations, risk management, and trading operations.
|
| 23 |
+
|
| 24 |
+
## Model Details
|
| 25 |
+
|
| 26 |
+
| Property | Value |
|
| 27 |
+
|----------|-------|
|
| 28 |
+
| **Base Model** | [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507) |
|
| 29 |
+
| **Parameters** | 4B (adapter only: ~134M trainable) |
|
| 30 |
+
| **Training Method** | SFT + DPO |
|
| 31 |
+
| **LoRA Rank** | 64 |
|
| 32 |
+
| **LoRA Alpha** | 128 |
|
| 33 |
+
| **Training Hardware** | NVIDIA A100-SXM4-80GB |
|
| 34 |
+
| **Version** | 0.4 |
|
| 35 |
+
|
| 36 |
+
## Training Techniques
|
| 37 |
+
|
| 38 |
+
### DoRA (Weight-Decomposed Low-Rank Adaptation)
|
| 39 |
+
v0.4 introduces **DoRA**, which decomposes weights into magnitude and direction components. This provides:
|
| 40 |
+
- Better fine-tuning stability
|
| 41 |
+
- Improved task performance (+3-4% over standard LoRA)
|
| 42 |
+
- More efficient parameter updates
|
| 43 |
+
|
| 44 |
+
### rsLoRA (Rank-Stabilized LoRA)
|
| 45 |
+
Uses rank-stabilized scaling factor (`lora_alpha / sqrt(r)` instead of `lora_alpha / r`) for:
|
| 46 |
+
- More stable training at higher ranks
|
| 47 |
+
- Better gradient flow
|
| 48 |
+
|
| 49 |
+
### DPO (Direct Preference Optimization)
|
| 50 |
+
Two-stage training pipeline:
|
| 51 |
+
1. **SFT Stage**: Supervised fine-tuning on 6,782 examples (40% general, 60% domain-specific)
|
| 52 |
+
2. **DPO Stage**: Preference alignment on 1,400 pairs targeting common failure modes
|
| 53 |
+
|
| 54 |
+
DPO pairs target these failure categories:
|
| 55 |
+
- Excessive leverage requests (26.4%)
|
| 56 |
+
- Position sizing errors (23.6%)
|
| 57 |
+
- Percentage confusion (16.1%)
|
| 58 |
+
- Risk policy violations (13.9%)
|
| 59 |
+
- Policy bypass attempts (10.0%)
|
| 60 |
+
- Uncertainty/caution calibration (9.9%)
|
| 61 |
+
|
| 62 |
+
## Performance (v0.3 → v0.4)
|
| 63 |
+
|
| 64 |
+
### Overall Accuracy
|
| 65 |
+
| Metric | v0.3 | v0.4 | Delta |
|
| 66 |
+
|--------|------|------|-------|
|
| 67 |
+
| **Graded Accuracy** | 67.4% | **78.5%** | **+11.1%** |
|
| 68 |
+
| Full Correct | 216/337 | 259/337 | +43 |
|
| 69 |
+
|
| 70 |
+
### Per-Category Results
|
| 71 |
+
|
| 72 |
+
| Category | v0.3 | v0.4 | Delta | Notes |
|
| 73 |
+
|----------|------|------|-------|-------|
|
| 74 |
+
| Parameter Validation | 93.3% | **100%** | +6.7% | Perfect score |
|
| 75 |
+
| Edge Cases | 92.5% | **95.0%** | +2.5% | |
|
| 76 |
+
| General Capability | 89.1% | **92.7%** | +3.6% | No catastrophic forgetting |
|
| 77 |
+
| Position Sizing | 83.3% | **86.7%** | +3.4% | |
|
| 78 |
+
| Adversarial % | 53.5% | **82.0%** | **+28.5%** | Major improvement |
|
| 79 |
+
| Trading Mechanics | 80.0% | 80.0% | 0% | Maintained |
|
| 80 |
+
| Multi-step Reasoning | 31.3% | **41.0%** | **+9.7%** | |
|
| 81 |
+
| Factual | 20.0% | 33.3% | +13.3% | Below target |
|
| 82 |
+
| API Structure | 27.5% | 10.8% | -16.7% | **Regression** |
|
| 83 |
+
|
| 84 |
+
### Key Improvements in v0.4
|
| 85 |
+
|
| 86 |
+
1. **Adversarial Percentage Handling (+28.5%)**
|
| 87 |
+
- Model now correctly distinguishes between "risk 2%", "allocate 2%", and "2x leverage"
|
| 88 |
+
- DPO pairs specifically targeting percentage confusion were highly effective
|
| 89 |
+
|
| 90 |
+
2. **Multi-step Reasoning (+9.7%)**
|
| 91 |
+
- Model shows intermediate calculation steps
|
| 92 |
+
- Better at complex position sizing scenarios
|
| 93 |
+
|
| 94 |
+
3. **General Capability Retention (+3.6%)**
|
| 95 |
+
- 40% general instruction mix prevented catastrophic forgetting
|
| 96 |
+
- Base model reasoning capabilities preserved
|
| 97 |
+
|
| 98 |
+
4. **Perfect Parameter Validation (100%)**
|
| 99 |
+
- Tick sizes, lot sizes, precision rules mastered
|
| 100 |
+
|
| 101 |
+
## Known Issues & Limitations
|
| 102 |
+
|
| 103 |
+
### API Structure Regression (10.8%)
|
| 104 |
+
The model has limited knowledge of Hyperliquid-specific API fields:
|
| 105 |
+
- Doesn't know abbreviated field names (`a`=asset, `b`=isBuy, `s`=size)
|
| 106 |
+
- May use incorrect base URL (`.net` vs `.xyz`)
|
| 107 |
+
- Invents non-existent endpoints
|
| 108 |
+
|
| 109 |
+
**Mitigation**: Use explicit API documentation in prompts or constrained decoding.
|
| 110 |
+
|
| 111 |
+
### Factual Knowledge Gaps (33.3%)
|
| 112 |
+
Some Hyperliquid-specific facts are unreliable:
|
| 113 |
+
- API URLs, WebSocket endpoints
|
| 114 |
+
- Time-in-force options (ALO, IOC, GTC)
|
| 115 |
+
- Fee structures, unstaking duration
|
| 116 |
+
|
| 117 |
+
**Mitigation**: Provide facts in system prompt for critical operations.
|
| 118 |
+
|
| 119 |
+
### Multi-step Final Answer Extraction
|
| 120 |
+
Model sometimes returns intermediate values instead of final answers. When calculation reasoning is shown correctly but final answer is wrong:
|
| 121 |
+
- Verify the calculation steps manually
|
| 122 |
+
- Extract the correct value from the reasoning
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
## Usage
|
| 125 |
|
| 126 |
+
### With PEFT
|
|
|
|
| 127 |
```python
|
|
|
|
| 128 |
from peft import PeftModel
|
| 129 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 130 |
|
|
|
|
| 131 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 132 |
"Qwen/Qwen3-4B-Instruct-2507",
|
| 133 |
+
torch_dtype="auto",
|
| 134 |
+
device_map="auto"
|
| 135 |
)
|
|
|
|
|
|
|
| 136 |
model = PeftModel.from_pretrained(base_model, "UVLabs/HyperLLM-4b")
|
| 137 |
tokenizer = AutoTokenizer.from_pretrained("UVLabs/HyperLLM-4b")
|
| 138 |
|
| 139 |
+
messages = [
|
| 140 |
+
{"role": "system", "content": "You are a trading assistant for Hyperliquid."},
|
| 141 |
+
{"role": "user", "content": "I have $10,000 and want to risk 2%. Entry at $100, stop at $95. What's my position size?"}
|
| 142 |
+
]
|
| 143 |
+
|
| 144 |
+
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 145 |
+
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
| 146 |
+
outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.1)
|
| 147 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 148 |
```
|
| 149 |
|
| 150 |
+
### With vLLM (Recommended for Production)
|
|
|
|
| 151 |
```python
|
| 152 |
+
from vllm import LLM, SamplingParams
|
|
|
|
|
|
|
| 153 |
|
| 154 |
+
llm = LLM(
|
| 155 |
+
model="Qwen/Qwen3-4B-Instruct-2507",
|
| 156 |
+
enable_lora=True,
|
| 157 |
+
max_lora_rank=64
|
| 158 |
)
|
| 159 |
+
llm.load_lora("UVLabs/HyperLLM-4b")
|
| 160 |
|
| 161 |
+
sampling_params = SamplingParams(temperature=0.1, max_tokens=512)
|
| 162 |
+
outputs = llm.generate(["Calculate position size..."], sampling_params)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
```
|
| 164 |
|
| 165 |
+
## Training Details
|
| 166 |
|
| 167 |
+
### SFT Phase
|
| 168 |
+
- **Dataset**: 6,782 examples (6,103 train / 679 val)
|
| 169 |
+
- **Epochs**: 1.57 (early stopping triggered)
|
| 170 |
+
- **Final Eval Loss**: 0.1324
|
| 171 |
+
- **Runtime**: 66.8 minutes
|
| 172 |
|
| 173 |
+
### DPO Phase
|
| 174 |
+
- **Dataset**: 1,400 preference pairs (1,260 train / 140 val)
|
| 175 |
+
- **Beta**: 0.05 (gentler KL penalty than v0.3's 0.1)
|
| 176 |
+
- **Epochs**: 2.0
|
| 177 |
+
- **Final Reward Accuracy**: 100%
|
| 178 |
+
- **Reward Margin**: 11.30
|
| 179 |
+
- **Runtime**: 29.8 minutes
|
| 180 |
|
| 181 |
+
### Infrastructure
|
| 182 |
+
- Unsloth 2x acceleration
|
| 183 |
+
- Liger Kernel optimizations
|
| 184 |
+
- TF32 enabled for A100
|
| 185 |
+
- Padding-free training
|
| 186 |
|
| 187 |
+
## Roadmap for v0.5
|
|
|
|
|
|
|
| 188 |
|
| 189 |
+
1. **Fix API Structure**: Add 300+ API-specific training examples with correct field mappings
|
| 190 |
+
2. **Improve Factual Knowledge**: Implement fact repetition (50+ variations per fact)
|
| 191 |
+
3. **Better Final Answer Extraction**: Enforce "Final Answer: X" format
|
| 192 |
+
4. **Market Knowledge Injection**: Add technical indicator and price action knowledge
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
## Citation
|
| 195 |
|
| 196 |
```bibtex
|
| 197 |
@misc{hyperllm2026,
|
| 198 |
+
title={HyperLLM: Fine-tuned LLM for Agentic Trading on Hyperliquid},
|
| 199 |
author={UVLabs},
|
| 200 |
year={2026},
|
| 201 |
url={https://huggingface.co/UVLabs/HyperLLM-4b}
|
| 202 |
}
|
| 203 |
```
|
| 204 |
+
|
| 205 |
+
## License
|
| 206 |
+
|
| 207 |
+
Apache 2.0
|
| 208 |
+
|
| 209 |
+
## Disclaimer
|
| 210 |
+
|
| 211 |
+
This model is for research and educational purposes. It is not financial advice. Always verify calculations and consult qualified professionals before making trading decisions. The authors are not responsible for any losses incurred from using this model.
|
adapter_config.json
CHANGED
|
@@ -29,18 +29,18 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"
|
| 33 |
-
"gate_proj",
|
| 34 |
-
"o_proj",
|
| 35 |
"q_proj",
|
|
|
|
|
|
|
| 36 |
"k_proj",
|
| 37 |
-
"
|
| 38 |
-
"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
| 42 |
"trainable_token_indices": null,
|
| 43 |
-
"use_dora":
|
| 44 |
"use_qalora": false,
|
| 45 |
"use_rslora": true
|
| 46 |
}
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"down_proj",
|
|
|
|
|
|
|
| 33 |
"q_proj",
|
| 34 |
+
"up_proj",
|
| 35 |
+
"gate_proj",
|
| 36 |
"k_proj",
|
| 37 |
+
"v_proj",
|
| 38 |
+
"o_proj"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
| 42 |
"trainable_token_indices": null,
|
| 43 |
+
"use_dora": true,
|
| 44 |
"use_qalora": false,
|
| 45 |
"use_rslora": true
|
| 46 |
}
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2eddb001a78738266b300b45fb2f9cd2116bccb31ea348b681a84c91dd8313bc
|
| 3 |
+
size 533009160
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
tokenizer_config.json
CHANGED
|
@@ -1,11 +1,217 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"add_prefix_space": false,
|
| 3 |
-
"
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
"<|im_start|>",
|
| 10 |
"<|im_end|>",
|
| 11 |
"<|object_ref_start|>",
|
|
@@ -20,7 +226,11 @@
|
|
| 20 |
"<|image_pad|>",
|
| 21 |
"<|video_pad|>"
|
| 22 |
],
|
| 23 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
"model_max_length": 1010000,
|
| 25 |
"pad_token": "<|endoftext|>",
|
| 26 |
"split_special_tokens": false,
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
"<|im_start|>",
|
| 216 |
"<|im_end|>",
|
| 217 |
"<|object_ref_start|>",
|
|
|
|
| 226 |
"<|image_pad|>",
|
| 227 |
"<|video_pad|>"
|
| 228 |
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"clean_up_tokenization_spaces": false,
|
| 231 |
+
"eos_token": "<|im_end|>",
|
| 232 |
+
"errors": "replace",
|
| 233 |
+
"extra_special_tokens": {},
|
| 234 |
"model_max_length": 1010000,
|
| 235 |
"pad_token": "<|endoftext|>",
|
| 236 |
"split_special_tokens": false,
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d06eeb207df108c40af6debaace45ca51e49dd0692558a3f5615490e9b673a27
|
| 3 |
+
size 6801
|