TeichAI/Claude-Opus-4.6-Reasoning-887x
Viewer • Updated • 886 • 2.05k • 86
How to use armand0e/Qwen3.5-9B-Coder with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("image-text-to-text", model="armand0e/Qwen3.5-9B-Coder")
messages = [
{
"role": "user",
"content": [
{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
{"type": "text", "text": "What animal is on the candy?"}
]
},
]
pipe(text=messages) # Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText
processor = AutoProcessor.from_pretrained("armand0e/Qwen3.5-9B-Coder")
model = AutoModelForImageTextToText.from_pretrained("armand0e/Qwen3.5-9B-Coder")
messages = [
{
"role": "user",
"content": [
{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
{"type": "text", "text": "What animal is on the candy?"}
]
},
]
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))How to use armand0e/Qwen3.5-9B-Coder with vLLM:
# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "armand0e/Qwen3.5-9B-Coder"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "armand0e/Qwen3.5-9B-Coder",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
]
}'docker model run hf.co/armand0e/Qwen3.5-9B-Coder
How to use armand0e/Qwen3.5-9B-Coder with SGLang:
# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
--model-path "armand0e/Qwen3.5-9B-Coder" \
--host 0.0.0.0 \
--port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "armand0e/Qwen3.5-9B-Coder",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
]
}'docker run --gpus all \
--shm-size 32g \
-p 30000:30000 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
--env "HF_TOKEN=<secret>" \
--ipc=host \
lmsysorg/sglang:latest \
python3 -m sglang.launch_server \
--model-path "armand0e/Qwen3.5-9B-Coder" \
--host 0.0.0.0 \
--port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "armand0e/Qwen3.5-9B-Coder",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
]
}'How to use armand0e/Qwen3.5-9B-Coder with Unsloth Studio:
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for armand0e/Qwen3.5-9B-Coder to start chatting
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for armand0e/Qwen3.5-9B-Coder to start chatting
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for armand0e/Qwen3.5-9B-Coder to start chatting
pip install unsloth
from unsloth import FastModel
model, tokenizer = FastModel.from_pretrained(
model_name="armand0e/Qwen3.5-9B-Coder",
max_seq_length=2048,
)How to use armand0e/Qwen3.5-9B-Coder with Docker Model Runner:
docker model run hf.co/armand0e/Qwen3.5-9B-Coder
This is a experimental finetune on a mix of many traces from many different models. Reasoning was left untouched.
Total train time: ~4 hours
import os
from unsloth import FastModel
import torch
from trl import SFTConfig, SFTTrainer
from teich import mask_data, prepare_data
MAX_SEQ_LEN = 32768
MODEL_NAME = "Qwen/Qwen3.5-9B"
OUTPUT_DIR = "/content/drive/MyDrive/Colab/outputs-qwen-tool-sft"
HUB_REPO_ID = "armand0e/Qwen3.5-9B-Coder"
HF_TOKEN = os.environ.get("HF_TOKEN", "")
CHAT_TEMPLATE_PATH = "qwen3.5-chat-template.jinja"
model, tokenizer = FastModel.from_pretrained(
model_name=MODEL_NAME,
max_seq_length=MAX_SEQ_LEN,
load_in_4bit=False,
load_in_8bit=False,
full_finetuning=False,
token=HF_TOKEN,
)
if CHAT_TEMPLATE_PATH:
with open(CHAT_TEMPLATE_PATH, "r", encoding="utf-8") as f:
custom_chat_template = f.read()
tokenizer.chat_template = custom_chat_template
if hasattr(tokenizer, "tokenizer") and tokenizer.tokenizer is not None:
tokenizer.tokenizer.chat_template = custom_chat_template
model = FastModel.get_peft_model(
model,
finetune_vision_layers = False, # Turn off for just text!
finetune_language_layers = True, # Should leave on!
finetune_attention_modules = True, # Attention good for GRPO
finetune_mlp_modules = True, # Should leave on always!
r = 32, # Larger = higher accuracy, but might overfit
lora_alpha = 32, # Recommended alpha == r at least
lora_dropout = 0,
bias = "none",
random_state = 3407,
)
train_dataset = prepare_data(
{
"qwen3.7-max": {
"source": "armand0e/qwen3.7-max", # stupid typo i made and now this model wasn't trained on the qwen3.7-max traces :(
},
"chat": {
"source": "TeichAI/claude-4.5-opus-high-reasoning-250x",
},
"opus-pi-agent": {
"source": "armand0e/badlogicgames-pi-mono-opus-filtered",
},
"kimi-k2.6-claude-code": {
"source": "armand0e/kimi-k2.6-claude-code-traces",
},
"chat-2": {
"source": "TeichAI/Claude-Opus-4.6-Reasoning-887x"
},
"minimax-m3-claude-code": {
"source": "armand0e/minimax-m3-claude-code-traces"
},
"more-opus": {
"source": "armand0e/claude-opus-4.8-pi-traces"
}
},
tokenizer,
split="train",
hf_token=HF_TOKEN,
chat_template_kwargs={"enable_thinking": False, "preserve_thinking": True},
max_length=MAX_SEQ_LEN,
oversized_policy="trim_followups",
tokenize=True,
strict=True,
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=train_dataset,
eval_dataset=None,
args=SFTConfig(
dataset_text_field="text",
dataset_num_proc=1,
max_length=MAX_SEQ_LEN,
packing=False,
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
warmup_steps= 5,
num_train_epochs=1,
learning_rate=2e-4,
logging_steps=1,
save_strategy="epoch",
save_total_limit=3,
optim="adamw_8bit",
weight_decay=0.01,
#max_grad_norm=0.3,
lr_scheduler_type="linear",
output_dir=OUTPUT_DIR,
seed=3407,
report_to="none",
),
)
trainer = mask_data(
trainer,
tokenizer=tokenizer,
train_on_reasoning=False,
train_on_final_answers=True,
train_on_tools=True,
)
print(trainer.train_dataset.preview())
trainer_stats = trainer.train(resume_from_checkpoint=False)
model.push_to_hub(f"{HUB_REPO_ID}-LoRA", token=HF_TOKEN)
tokenizer.push_to_hub(f"{HUB_REPO_ID}-LoRA", token=HF_TOKEN)
model.push_to_hub_merged(HUB_REPO_ID, tokenizer, save_method="merged_16bit", token=HF_TOKEN)
The data for this model was easily formatted and masked with Teich
This qwen3_5 model was trained 2x faster with Unsloth and Huggingface's TRL library.
Base model
Qwen/Qwen3.5-9B-Base