Spaces:
Sleeping
Sleeping
Commit
Β·
d02a9d8
1
Parent(s):
b36a0b0
Update Qwen model to use AWQ quantized version
Browse files- test_quantization_notebook.py +38 -18
test_quantization_notebook.py
CHANGED
|
@@ -65,7 +65,11 @@ def test_imports():
|
|
| 65 |
|
| 66 |
# Test compressed_tensors imports (usually comes with llmcompressor)
|
| 67 |
try:
|
| 68 |
-
from compressed_tensors.quantization import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
print("β
compressed_tensors.quantization")
|
| 70 |
except ImportError:
|
| 71 |
print("β οΈ compressed_tensors (not installed locally - will install in Colab)")
|
|
@@ -93,24 +97,36 @@ def test_awq_modifier_creation():
|
|
| 93 |
|
| 94 |
try:
|
| 95 |
from llmcompressor.modifiers.awq import AWQModifier
|
| 96 |
-
from compressed_tensors.quantization import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
# Create quantization
|
| 99 |
-
print(" β Creating
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
)
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
# Create AWQModifier
|
| 112 |
print(" β Creating AWQModifier...")
|
| 113 |
-
modifier = AWQModifier(
|
| 114 |
print(" β
AWQModifier created successfully")
|
| 115 |
|
| 116 |
return True
|
|
@@ -193,10 +209,15 @@ def test_configuration():
|
|
| 193 |
}
|
| 194 |
|
| 195 |
AWQ_CONFIG = {
|
| 196 |
-
"
|
| 197 |
-
"
|
| 198 |
"zero_point": True,
|
| 199 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
}
|
| 201 |
|
| 202 |
print(f" β
Configuration structure valid")
|
|
@@ -254,4 +275,3 @@ def main():
|
|
| 254 |
|
| 255 |
if __name__ == "__main__":
|
| 256 |
sys.exit(main())
|
| 257 |
-
|
|
|
|
| 65 |
|
| 66 |
# Test compressed_tensors imports (usually comes with llmcompressor)
|
| 67 |
try:
|
| 68 |
+
from compressed_tensors.quantization import QuantizationScheme, QuantizationArgs
|
| 69 |
+
from compressed_tensors.quantization.quant_args import (
|
| 70 |
+
QuantizationStrategy,
|
| 71 |
+
QuantizationType,
|
| 72 |
+
)
|
| 73 |
print("β
compressed_tensors.quantization")
|
| 74 |
except ImportError:
|
| 75 |
print("β οΈ compressed_tensors (not installed locally - will install in Colab)")
|
|
|
|
| 97 |
|
| 98 |
try:
|
| 99 |
from llmcompressor.modifiers.awq import AWQModifier
|
| 100 |
+
from compressed_tensors.quantization import QuantizationScheme, QuantizationArgs
|
| 101 |
+
from compressed_tensors.quantization.quant_args import (
|
| 102 |
+
QuantizationStrategy,
|
| 103 |
+
QuantizationType,
|
| 104 |
+
)
|
| 105 |
|
| 106 |
+
# Create quantization scheme (mirrors notebook helper)
|
| 107 |
+
print(" β Creating QuantizationScheme...")
|
| 108 |
+
weights = QuantizationArgs(
|
| 109 |
+
num_bits=4,
|
| 110 |
+
group_size=128,
|
| 111 |
+
symmetric=False,
|
| 112 |
+
strategy=QuantizationStrategy.GROUP,
|
| 113 |
+
type=QuantizationType.INT,
|
| 114 |
+
observer="minmax",
|
| 115 |
+
dynamic=False,
|
| 116 |
)
|
| 117 |
+
scheme = QuantizationScheme(
|
| 118 |
+
targets=["Linear"],
|
| 119 |
+
weights=weights,
|
| 120 |
+
input_activations=None,
|
| 121 |
+
output_activations=None,
|
| 122 |
+
format="pack-quantized",
|
| 123 |
+
)
|
| 124 |
+
config_groups = {"group_0": scheme}
|
| 125 |
+
print(" β
QuantizationScheme created")
|
| 126 |
|
| 127 |
# Create AWQModifier
|
| 128 |
print(" β Creating AWQModifier...")
|
| 129 |
+
modifier = AWQModifier(config_groups=config_groups, ignore=["lm_head"])
|
| 130 |
print(" β
AWQModifier created successfully")
|
| 131 |
|
| 132 |
return True
|
|
|
|
| 209 |
}
|
| 210 |
|
| 211 |
AWQ_CONFIG = {
|
| 212 |
+
"num_bits": 4,
|
| 213 |
+
"group_size": 128,
|
| 214 |
"zero_point": True,
|
| 215 |
+
"strategy": "group",
|
| 216 |
+
"targets": ["Linear"],
|
| 217 |
+
"ignore": ["lm_head"],
|
| 218 |
+
"format": "pack-quantized",
|
| 219 |
+
"observer": "minmax",
|
| 220 |
+
"dynamic": False,
|
| 221 |
}
|
| 222 |
|
| 223 |
print(f" β
Configuration structure valid")
|
|
|
|
| 275 |
|
| 276 |
if __name__ == "__main__":
|
| 277 |
sys.exit(main())
|
|
|