Update README.md
Browse files
README.md
CHANGED
@@ -12,7 +12,7 @@ import io
|
|
12 |
model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda"))
|
13 |
|
14 |
from torchao.quantization import Int4WeightOnlyConfig, quantize_
|
15 |
-
quant_config = Int4WeightOnlyConfig(group_size=128,
|
16 |
quantize_(model, quant_config)
|
17 |
example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),)
|
18 |
output = model(*example_inputs)
|
|
|
12 |
model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda"))
|
13 |
|
14 |
from torchao.quantization import Int4WeightOnlyConfig, quantize_
|
15 |
+
quant_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="plain", version=2)
|
16 |
quantize_(model, quant_config)
|
17 |
example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),)
|
18 |
output = model(*example_inputs)
|