torchao-testing
/

single-linear-Int4WeightOnlyConfig-v2-0.13.dev

Model card Files Files and versions

jerryzh168 commited on 16 days ago

Commit

acbacde

·

verified ·

1 Parent(s): e54d183

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -12,7 +12,7 @@ import io
 model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda"))
 from torchao.quantization import Int4WeightOnlyConfig, quantize_
-quant_config = Int4WeightOnlyConfig(group_size=128, packing_format="plain", version=2)
 quantize_(model, quant_config)
 example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),)
 output = model(*example_inputs)

 model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda"))
 from torchao.quantization import Int4WeightOnlyConfig, quantize_
+quant_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="plain", version=2)
 quantize_(model, quant_config)
 example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),)
 output = model(*example_inputs)