|
``` |
|
model: single_linear |
|
config: Int4WeightOnlyConfig |
|
config version: 2 |
|
torchao version: 0.13.dev |
|
``` |
|
|
|
``` |
|
import torch |
|
import io |
|
|
|
model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")) |
|
|
|
from torchao.quantization import Int4WeightOnlyConfig, quantize_ |
|
quant_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="plain", version=2) |
|
quantize_(model, quant_config) |
|
example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),) |
|
output = model(*example_inputs) |
|
|
|
# Push to hub |
|
USER_ID = "torchao-testing" |
|
MODEL_NAME = "single-linear" |
|
save_to = f"{USER_ID}/{MODEL_NAME}-Int4WeightOnlyConfig-v2-0.13.dev" |
|
|
|
from huggingface_hub import HfApi |
|
api = HfApi() |
|
|
|
buf = io.BytesIO() |
|
torch.save(model.state_dict(), buf) |
|
api.create_repo(save_to, repo_type="model", exist_ok=True) |
|
api.upload_file( |
|
path_or_fileobj=buf, |
|
path_in_repo="model.pt", |
|
repo_id=save_to, |
|
) |
|
|
|
buf = io.BytesIO() |
|
torch.save(example_inputs, buf) |
|
api.upload_file( |
|
path_or_fileobj=buf, |
|
path_in_repo="model_inputs.pt", |
|
repo_id=save_to, |
|
) |
|
|
|
buf = io.BytesIO() |
|
torch.save(output, buf) |
|
api.upload_file( |
|
path_or_fileobj=buf, |
|
path_in_repo="model_output.pt", |
|
repo_id=save_to, |
|
) |
|
``` |