Qwen3.5 9B Coder

This is a experimental finetune on a mix of many traces from many different models. Reasoning was left untouched.

Total train time: ~4 hours

Training Script

Training Script
import os
from unsloth import FastModel
import torch
from trl import SFTConfig, SFTTrainer
from teich import mask_data, prepare_data

MAX_SEQ_LEN = 32768
MODEL_NAME = "Qwen/Qwen3.5-9B"
OUTPUT_DIR = "/content/drive/MyDrive/Colab/outputs-qwen-tool-sft"
HUB_REPO_ID = "armand0e/Qwen3.5-9B-Coder"
HF_TOKEN = os.environ.get("HF_TOKEN", "")
CHAT_TEMPLATE_PATH = "qwen3.5-chat-template.jinja"

model, tokenizer = FastModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LEN,
    load_in_4bit=False,
    load_in_8bit=False,
    full_finetuning=False,
    token=HF_TOKEN,
)

if CHAT_TEMPLATE_PATH:
    with open(CHAT_TEMPLATE_PATH, "r", encoding="utf-8") as f:
        custom_chat_template = f.read()
    tokenizer.chat_template = custom_chat_template
    if hasattr(tokenizer, "tokenizer") and tokenizer.tokenizer is not None:
        tokenizer.tokenizer.chat_template = custom_chat_template

model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # Should leave on always!

    r = 32,           # Larger = higher accuracy, but might overfit
    lora_alpha = 32,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

train_dataset = prepare_data(
    {
        "qwen3.7-max": {
            "source": "armand0e/qwen3.7-max", # stupid typo i made and now this model wasn't trained on the qwen3.7-max traces :(
        },
        "chat": {
            "source": "TeichAI/claude-4.5-opus-high-reasoning-250x",
        },
        "opus-pi-agent": {
            "source": "armand0e/badlogicgames-pi-mono-opus-filtered",
        },
        "kimi-k2.6-claude-code": {
            "source": "armand0e/kimi-k2.6-claude-code-traces",
        },
        "chat-2": {
            "source": "TeichAI/Claude-Opus-4.6-Reasoning-887x"
        },
        "minimax-m3-claude-code": {
            "source": "armand0e/minimax-m3-claude-code-traces"
        },
        "more-opus": {
            "source": "armand0e/claude-opus-4.8-pi-traces"
        }
    },
    tokenizer,
    split="train",
    hf_token=HF_TOKEN,
    chat_template_kwargs={"enable_thinking": False, "preserve_thinking": True},
    max_length=MAX_SEQ_LEN,
    oversized_policy="trim_followups",
    tokenize=True,
    strict=True,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=None,
    args=SFTConfig(
        dataset_text_field="text",
        dataset_num_proc=1,
        max_length=MAX_SEQ_LEN,
        packing=False,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        warmup_steps= 5,
        num_train_epochs=1,
        learning_rate=2e-4,
        logging_steps=1,
        save_strategy="epoch",
        save_total_limit=3,
        optim="adamw_8bit",
        weight_decay=0.01,
        #max_grad_norm=0.3,
        lr_scheduler_type="linear",
        output_dir=OUTPUT_DIR,
        seed=3407,
        report_to="none",
    ),
)

trainer = mask_data(
    trainer,
    tokenizer=tokenizer,
    train_on_reasoning=False,
    train_on_final_answers=True,
    train_on_tools=True,
)

print(trainer.train_dataset.preview())

trainer_stats = trainer.train(resume_from_checkpoint=False)

model.push_to_hub(f"{HUB_REPO_ID}-LoRA", token=HF_TOKEN)
tokenizer.push_to_hub(f"{HUB_REPO_ID}-LoRA", token=HF_TOKEN)

model.push_to_hub_merged(HUB_REPO_ID, tokenizer, save_method="merged_16bit", token=HF_TOKEN)

The data for this model was easily formatted and masked with Teich

  • Developed by: armand0e
  • License: apache-2.0
  • Finetuned from model : Qwen/Qwen3.5-9B

This qwen3_5 model was trained 2x faster with Unsloth and Huggingface's TRL library.

Downloads last month
419
Safetensors
Model size
10B params
Tensor type
BF16
·
F32
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for armand0e/Qwen3.5-9B-Coder

Finetuned
Qwen/Qwen3.5-9B
Finetuned
(368)
this model
Merges
4 models
Quantizations
2 models

Datasets used to train armand0e/Qwen3.5-9B-Coder