diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..0cd58331b2a989b68be4ec5676383437fca8687b
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.so filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2e1a774ac3eaeca2a0f28bab763e27ec6811bfd0
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+---
+license: apache-2.0
+tags:
+  - kernels
+---
+
+![Status](https://hubwebhook.dholtz.com/shield?repo=kernels-community/quantization-eetq)
+
+## eetq
+
+EETQ kernels from [NetEase-FuXi/EETQ](https://github.com/NetEase-FuXi/EETQ).
+
+Kernel source: https://github.com/huggingface/kernels-community/tree/main/quantization-eetq
+
diff --git a/build/torch210-cxx11-cu126-aarch64-linux/__init__.py b/build/torch210-cxx11-cu126-aarch64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch210-cxx11-cu126-aarch64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch210-cxx11-cu126-aarch64-linux/_ops.py b/build/torch210-cxx11-cu126-aarch64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch210-cxx11-cu126-aarch64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch210-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch210-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..4efcd5beaa94c0c2a3ddf41e7bcb070e78d7acbb
--- /dev/null
+++ b/build/torch210-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d7c4c1ddd8d31cee535f23012aa490749a38b28dc3592473eea939e3611b3b1
+size 39010048
diff --git a/build/torch210-cxx11-cu126-aarch64-linux/custom_ops.py b/build/torch210-cxx11-cu126-aarch64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch210-cxx11-cu126-aarch64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch210-cxx11-cu126-aarch64-linux/metadata.json b/build/torch210-cxx11-cu126-aarch64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f
--- /dev/null
+++ b/build/torch210-cxx11-cu126-aarch64-linux/metadata.json
@@ -0,0 +1,18 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0+PTX"
+    ]
+  }
+}
diff --git a/build/torch210-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py b/build/torch210-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch210-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/__init__.py b/build/torch210-cxx11-cu126-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch210-cxx11-cu126-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_ops.py b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch210-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..927178805a814378e80b246d7b8cdacafa168135
--- /dev/null
+++ b/build/torch210-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d60d2faac38ccbfeca46a6f9b025cdce2590bb5291da8ae42f0e2235a29ec066
+size 39060416
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/custom_ops.py b/build/torch210-cxx11-cu126-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch210-cxx11-cu126-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/metadata.json b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f
--- /dev/null
+++ b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json
@@ -0,0 +1,18 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0+PTX"
+    ]
+  }
+}
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py b/build/torch210-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch210-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch210-cxx11-cu128-aarch64-linux/__init__.py b/build/torch210-cxx11-cu128-aarch64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch210-cxx11-cu128-aarch64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch210-cxx11-cu128-aarch64-linux/_ops.py b/build/torch210-cxx11-cu128-aarch64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch210-cxx11-cu128-aarch64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch210-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch210-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..1af2aabd02cf61d7454b95908d253d0bc439d8fe
--- /dev/null
+++ b/build/torch210-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0aa8d32b1d6f07f1b095c7228dbe14325aeeba864aefcdc614654261babce162
+size 45366048
diff --git a/build/torch210-cxx11-cu128-aarch64-linux/custom_ops.py b/build/torch210-cxx11-cu128-aarch64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch210-cxx11-cu128-aarch64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch210-cxx11-cu128-aarch64-linux/metadata.json b/build/torch210-cxx11-cu128-aarch64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda
--- /dev/null
+++ b/build/torch210-cxx11-cu128-aarch64-linux/metadata.json
@@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}
diff --git a/build/torch210-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py b/build/torch210-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch210-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/__init__.py b/build/torch210-cxx11-cu128-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch210-cxx11-cu128-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_ops.py b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch210-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..cf63326135f23f3dbcaae1990872eebd9ee082f8
--- /dev/null
+++ b/build/torch210-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4735c25762548fcf265184a9c06da1bddc95f663f183b15e53b09f98c35ad321
+size 45394000
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/custom_ops.py b/build/torch210-cxx11-cu128-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch210-cxx11-cu128-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/metadata.json b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda
--- /dev/null
+++ b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json
@@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py b/build/torch210-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch210-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch211-cxx11-cu126-aarch64-linux/__init__.py b/build/torch211-cxx11-cu126-aarch64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch211-cxx11-cu126-aarch64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch211-cxx11-cu126-aarch64-linux/_ops.py b/build/torch211-cxx11-cu126-aarch64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch211-cxx11-cu126-aarch64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch211-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch211-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..01f035b3c7232e2479312ba5463bba1deb4d6941
--- /dev/null
+++ b/build/torch211-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b1d6d36599980e86a95f70c00d05c9aacef4a4d24188a84c3b258a2a06133fd
+size 39006256
diff --git a/build/torch211-cxx11-cu126-aarch64-linux/custom_ops.py b/build/torch211-cxx11-cu126-aarch64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch211-cxx11-cu126-aarch64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch211-cxx11-cu126-aarch64-linux/metadata.json b/build/torch211-cxx11-cu126-aarch64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f
--- /dev/null
+++ b/build/torch211-cxx11-cu126-aarch64-linux/metadata.json
@@ -0,0 +1,18 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0+PTX"
+    ]
+  }
+}
diff --git a/build/torch211-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py b/build/torch211-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch211-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch211-cxx11-cu126-x86_64-linux/__init__.py b/build/torch211-cxx11-cu126-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch211-cxx11-cu126-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_ops.py b/build/torch211-cxx11-cu126-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch211-cxx11-cu126-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch211-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..b83be2e6f92eb37e9f8ec77cee09197f3860502c
--- /dev/null
+++ b/build/torch211-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1264b126ed4d9c9d80758671f9965e830bfa916631934b81784e194c74b293a0
+size 39053336
diff --git a/build/torch211-cxx11-cu126-x86_64-linux/custom_ops.py b/build/torch211-cxx11-cu126-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch211-cxx11-cu126-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch211-cxx11-cu126-x86_64-linux/metadata.json b/build/torch211-cxx11-cu126-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f
--- /dev/null
+++ b/build/torch211-cxx11-cu126-x86_64-linux/metadata.json
@@ -0,0 +1,18 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0+PTX"
+    ]
+  }
+}
diff --git a/build/torch211-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py b/build/torch211-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch211-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch211-cxx11-cu128-aarch64-linux/__init__.py b/build/torch211-cxx11-cu128-aarch64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch211-cxx11-cu128-aarch64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch211-cxx11-cu128-aarch64-linux/_ops.py b/build/torch211-cxx11-cu128-aarch64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch211-cxx11-cu128-aarch64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch211-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch211-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..5de5e2befd073cadcb1dc235e776c18fca1ed06d
--- /dev/null
+++ b/build/torch211-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:894bebb496c1327c2855c6b1c4b362ebde395fbdc2a5050127b20ebd2faa943d
+size 45296712
diff --git a/build/torch211-cxx11-cu128-aarch64-linux/custom_ops.py b/build/torch211-cxx11-cu128-aarch64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch211-cxx11-cu128-aarch64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch211-cxx11-cu128-aarch64-linux/metadata.json b/build/torch211-cxx11-cu128-aarch64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda
--- /dev/null
+++ b/build/torch211-cxx11-cu128-aarch64-linux/metadata.json
@@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}
diff --git a/build/torch211-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py b/build/torch211-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch211-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch211-cxx11-cu128-x86_64-linux/__init__.py b/build/torch211-cxx11-cu128-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch211-cxx11-cu128-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_ops.py b/build/torch211-cxx11-cu128-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch211-cxx11-cu128-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch211-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..7311eecc011e3fff0cbc77d3bd4485721c9b3f93
--- /dev/null
+++ b/build/torch211-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5782cd3f5577334deb176bd5e7502850d2302ade7b7df9ff9491c011ccba469
+size 45382832
diff --git a/build/torch211-cxx11-cu128-x86_64-linux/custom_ops.py b/build/torch211-cxx11-cu128-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch211-cxx11-cu128-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch211-cxx11-cu128-x86_64-linux/metadata.json b/build/torch211-cxx11-cu128-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda
--- /dev/null
+++ b/build/torch211-cxx11-cu128-x86_64-linux/metadata.json
@@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}
diff --git a/build/torch211-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py b/build/torch211-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch211-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..a51485489a7880f47b8c4512f61dc8268887de18
--- /dev/null
+++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09177b6bbe99ca3dadec283fcd1fbed831b5e150c6afc3ab10704ddceea95576
+size 31359200
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/__init__.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_ops.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..7eb2f8ed3f57890e8bbce63b0c58612535b95eda
--- /dev/null
+++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:071573e21e6054145a3182514e04116df6866ae748c227fd2ea436153e4c6275
+size 31002600
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..6bb2bf97e99c976b8f705cdc70a8d401d0d985af
--- /dev/null
+++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c312731f3cfedd24fa364a2ea37ffbdbef283026d9970ece49558ab990b64dd
+size 30886928
diff --git a/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py b/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch26-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_ops.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..c3714edd48c1fd5a30b4e94d3542b0c76872264c
--- /dev/null
+++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0910d73c92282fee4a95a03b12f5c7f5a014d27e452deed21425c6c959da509a
+size 31029576
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/__init__.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_ops.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..1453f92ff817008a7381a36de2325d1dffb52a5d
--- /dev/null
+++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fce19321affc8e4734484925ac93862ba41fa5d192ef133710c07e82f1e344e4
+size 31354584
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/__init__.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_ops.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..b51b00d454a66090799dd41cb701870111c34237
--- /dev/null
+++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7315f176ff319da68450bc7396fada660484741687b7b0c907baf4520c511219
+size 31166344
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/__init__.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/_ops.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..abc8c351578fbc199601a6076e64b79704792cba
--- /dev/null
+++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4aa8aceae17bbed8be4287c4cbc01f8249b385e260a139172a34aa12efecb89
+size 31083056
diff --git a/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/custom_ops.py b/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch26-cxx98-cu126-aarch64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/__init__.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_ops.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..817fb93514eb52be07446183f00a2bcd0c73d459
--- /dev/null
+++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c98e478af0de310d3588e72c9cb10e7c787413f3caae65843df3f6fa16a473e2
+size 31189216
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..690365a76d26633adfc3f1baf2d7163a96a9ea56
Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..77e3fdadbe96d67af23bf70c0261f1971e112679
Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..285a1c499617133099e61e4188fde71dd8d203c5
Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ca134cca31c3e4f6dbc2bda32496668ed8061bb
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_92f3139
+ops = torch.ops._quantization_eetq_92f3139
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_92f3139::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..d96419978cfca526fdeeed071e790fe8a502442b
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a87d3bf237f1e897b2b58d95a686ee4b5584329b6d04243eaf970681be00bb67
+size 31977872
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba5306869676d333d0aef70d53688a4bccbae
--- /dev/null
+++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_6337ee0
+ops = torch.ops._quantization_eetq_6337ee0
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_6337ee0::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so b/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..2e5f6a843de1b5a81cf5fb679058f63e63f17224
--- /dev/null
+++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_6337ee0.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f2f79a15fee959addb101180c59e6f67f9739b795d6bf5ce696db9760b2807
+size 30887136
diff --git a/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py b/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch27-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c7ef1e106828b07199e27dc3db211a9192f76995
Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e1bf83fa8eee688a00a7b568f1bc0c6eada661c
Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db8ca6cfc35062c0f72aca34e7f5c8aa79badba6
Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ca134cca31c3e4f6dbc2bda32496668ed8061bb
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_92f3139
+ops = torch.ops._quantization_eetq_92f3139
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_92f3139::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..ab97a6a7bf6b48b1d51bad46c7569d8748669c51
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a0d40ed3d499de55afde610e6bdb0c08e3ed40128c6f82984a1fdfbd2a5baae
+size 31639912
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..47c6cd9d0f0aa7d5a18bea9830f52f8040f152d4
Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..77ece60ca504b3fe50fbc49221d65d5f2c93dd58
Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..716a5880a810e81b8bf029320a6f81aeaae0b541
Binary files /dev/null and b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/_ops.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..36f39b4d4fa79b4a8a6bbf9181cd71831f70a633
--- /dev/null
+++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_ee9ac42
+ops = torch.ops._quantization_eetq_ee9ac42
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_ee9ac42::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..f20ed8218a942ae54455c6aaed44e8820e1dc808
--- /dev/null
+++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c32dd6d144418fd7255d5555b634407890ad96b298c98a00bb8463d6f6dd9be
+size 38028936
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/custom_ops.py b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch27-cxx11-cu128-aarch64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2c33283dc44b927628a3af9ca2f19232b93e1ffa
Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ca9825e4a92eb9b7a6ca5cdd16a15312dd852bb1
Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c7a8ce97732c00a2ce8bcc355bf57cfe284761b7
Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc differ
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ca134cca31c3e4f6dbc2bda32496668ed8061bb
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_92f3139
+ops = torch.ops._quantization_eetq_92f3139
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_92f3139::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..2d0a22e88f35ace9fc27e20d02197486b439fe67
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/_quantization_eetq_92f3139.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb04c8f5d3cb36f8566d702e184f66277061bbbc2a72693bdfdb1182e2516125
+size 38054616
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/custom_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8227ca00f8cbcce3782760c6048df9807fe65a03
Binary files /dev/null and b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a49f22ad0e44d80e12c52e37b19610454845913c
Binary files /dev/null and b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1295452813ceab08f3b02fbd71e2f2e1855a1851
Binary files /dev/null and b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca4f55199f0790796ac528270956a3046014573c
--- /dev/null
+++ b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_1d4b892
+ops = torch.ops._quantization_eetq_1d4b892
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_1d4b892::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_1d4b892.abi3.so b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_1d4b892.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..f30efb3c50ffc04c5dd424f5ca0207c7e4a0ec9f
--- /dev/null
+++ b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_1d4b892.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5df285bd2f19db1923189158250f6aa42e48caf8e4a4a9b9a36fc463fd0cdf4c
+size 30814608
diff --git a/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch28-cxx11-cu126-aarch64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..be717c1c4741c58826e26a2990d6ce319687e70a
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_605c008
+ops = torch.ops._quantization_eetq_605c008
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_605c008::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/_quantization_eetq_605c008.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/_quantization_eetq_605c008.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..e6c86ed5f1652619b25d2499f8fc27bd4456aeaf
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/_quantization_eetq_605c008.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11e5e3b0a58f1f91bc5809e248100d391480e24f39e5104f56acb3e70d1894bc
+size 39045544
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/custom_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/metadata.json b/build/torch28-cxx11-cu126-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/metadata.json
@@ -0,0 +1,4 @@
+{
+  "version": 1,
+  "python-depends": []
+}
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import sys
+
+import importlib
+from pathlib import Path
+from types import ModuleType
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59674a5420a3fe2b393a7ec87f05be4d7809c6d5
Binary files /dev/null and b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff9f7cd5f81606282c1528f14c955cf3309ed08d
Binary files /dev/null and b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a9bb1278b5e3db274c5947b45d4b992cb59bca4
Binary files /dev/null and b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/_ops.py b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca4f55199f0790796ac528270956a3046014573c
--- /dev/null
+++ b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_1d4b892
+ops = torch.ops._quantization_eetq_1d4b892
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_1d4b892::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/_quantization_eetq_1d4b892.abi3.so b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/_quantization_eetq_1d4b892.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..f588d976a3b0007d1887c984d706afa0fcec5a1b
--- /dev/null
+++ b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/_quantization_eetq_1d4b892.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d538ef151884e17e07c471c397a6645a0dbfbd66d7ae4b299a4c8091f41a55e9
+size 37432240
diff --git a/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/custom_ops.py b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch28-cxx11-cu128-aarch64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..be717c1c4741c58826e26a2990d6ce319687e70a
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_605c008
+ops = torch.ops._quantization_eetq_605c008
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_605c008::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/_quantization_eetq_605c008.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/_quantization_eetq_605c008.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..ee70dfb44c87d58a8a52f7983256814873a82f59
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/_quantization_eetq_605c008.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6c0ec3bf231c79df6cac872280b7d4fb4fbb0fa563784e00b83f1aa5be3825a
+size 45378576
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/custom_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/metadata.json b/build/torch28-cxx11-cu128-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/metadata.json
@@ -0,0 +1,4 @@
+{
+  "version": 1,
+  "python-depends": []
+}
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import sys
+
+import importlib
+from pathlib import Path
+from types import ModuleType
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__init__.py b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd55f62f9ecdac66048d1b51750411c2e1f4d484
Binary files /dev/null and b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1bd2e02a4d62092f4e1d7487445734283a8b418
Binary files /dev/null and b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fb4e784cbd1f7ffa6e40def01fc3f014205913d
Binary files /dev/null and b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc differ
diff --git a/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/_ops.py b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..36f39b4d4fa79b4a8a6bbf9181cd71831f70a633
--- /dev/null
+++ b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_ee9ac42
+ops = torch.ops._quantization_eetq_ee9ac42
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_ee9ac42::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..0df48b6ab7fbab2946d74a7fd630348e668a1372
--- /dev/null
+++ b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3c53ac387f693a12937e3c6d8640832470cd801117269fcbc2f6a1a83382612
+size 38743552
diff --git a/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/custom_ops.py b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch28-cxx11-cu129-aarch64-linux/quantization_eetq/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..be717c1c4741c58826e26a2990d6ce319687e70a
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_605c008
+ops = torch.ops._quantization_eetq_605c008
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_605c008::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/_quantization_eetq_605c008.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/_quantization_eetq_605c008.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..79a684b1366a17beb76407c8f2e86766a90164e8
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/_quantization_eetq_605c008.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5c671d2cd844ed9703744e1a337c901ef7e6265f70ef1cb28cc64a4a06df81f
+size 46492944
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/custom_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/metadata.json b/build/torch28-cxx11-cu129-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9cf5deed9898dce769f4cc73913d3530b92a0bd8
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/metadata.json
@@ -0,0 +1,4 @@
+{
+  "version": 1,
+  "python-depends": []
+}
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/quantization_eetq/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import sys
+
+import importlib
+from pathlib import Path
+from types import ModuleType
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu126-aarch64-linux/__init__.py b/build/torch29-cxx11-cu126-aarch64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch29-cxx11-cu126-aarch64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch29-cxx11-cu126-aarch64-linux/_ops.py b/build/torch29-cxx11-cu126-aarch64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..52f2ce3367ecf7eb86267cd1d358ae9cff984cdb
--- /dev/null
+++ b/build/torch29-cxx11-cu126-aarch64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_07c987f
+ops = torch.ops._quantization_eetq_cuda_07c987f
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_07c987f::{op_name}"
diff --git a/build/torch29-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so b/build/torch29-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..e80ee1b31ed76215ebbb2c6f607386dfc2e49c37
--- /dev/null
+++ b/build/torch29-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0dc7862071e70892ff753e2c3b90f40e9f67f698d585b5be9fca0dc1e8d92fb
+size 39006704
diff --git a/build/torch29-cxx11-cu126-aarch64-linux/custom_ops.py b/build/torch29-cxx11-cu126-aarch64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch29-cxx11-cu126-aarch64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch29-cxx11-cu126-aarch64-linux/metadata.json b/build/torch29-cxx11-cu126-aarch64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f
--- /dev/null
+++ b/build/torch29-cxx11-cu126-aarch64-linux/metadata.json
@@ -0,0 +1,18 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0+PTX"
+    ]
+  }
+}
diff --git a/build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py b/build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309
--- /dev/null
+++ b/build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import sys
+
+import importlib
+from pathlib import Path
+from types import ModuleType
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch29-cxx11-cu126-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/_ops.py b/build/torch29-cxx11-cu126-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..52f2ce3367ecf7eb86267cd1d358ae9cff984cdb
--- /dev/null
+++ b/build/torch29-cxx11-cu126-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_07c987f
+ops = torch.ops._quantization_eetq_cuda_07c987f
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_07c987f::{op_name}"
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_07c987f.abi3.so b/build/torch29-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_07c987f.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..a44a834da02a835ca27cdf3b71530fa11aeeac48
--- /dev/null
+++ b/build/torch29-cxx11-cu126-x86_64-linux/_quantization_eetq_cuda_07c987f.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bc9529940b02d24b5d8ea454e2a459fbf2f17e3db6261c30c50145544d27bc4
+size 39049640
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/custom_ops.py b/build/torch29-cxx11-cu126-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch29-cxx11-cu126-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/metadata.json b/build/torch29-cxx11-cu126-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5902b55ab0b2b561c0cf97567c9806c60839c7f
--- /dev/null
+++ b/build/torch29-cxx11-cu126-x86_64-linux/metadata.json
@@ -0,0 +1,18 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0+PTX"
+    ]
+  }
+}
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309
--- /dev/null
+++ b/build/torch29-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import sys
+
+import importlib
+from pathlib import Path
+from types import ModuleType
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu128-aarch64-linux/__init__.py b/build/torch29-cxx11-cu128-aarch64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch29-cxx11-cu128-aarch64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch29-cxx11-cu128-aarch64-linux/_ops.py b/build/torch29-cxx11-cu128-aarch64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..52f2ce3367ecf7eb86267cd1d358ae9cff984cdb
--- /dev/null
+++ b/build/torch29-cxx11-cu128-aarch64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_07c987f
+ops = torch.ops._quantization_eetq_cuda_07c987f
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_07c987f::{op_name}"
diff --git a/build/torch29-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so b/build/torch29-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..2bd91fdcf72f4acbe5c1f04527cae37a9b317487
--- /dev/null
+++ b/build/torch29-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fe7e9d12743a281594a2bcc46d8592a8f1bc6d74c0e51bb1c85d04a67e5001c
+size 45296736
diff --git a/build/torch29-cxx11-cu128-aarch64-linux/custom_ops.py b/build/torch29-cxx11-cu128-aarch64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch29-cxx11-cu128-aarch64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch29-cxx11-cu128-aarch64-linux/metadata.json b/build/torch29-cxx11-cu128-aarch64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda
--- /dev/null
+++ b/build/torch29-cxx11-cu128-aarch64-linux/metadata.json
@@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}
diff --git a/build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py b/build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309
--- /dev/null
+++ b/build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import sys
+
+import importlib
+from pathlib import Path
+from types import ModuleType
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch29-cxx11-cu128-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/_ops.py b/build/torch29-cxx11-cu128-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..52f2ce3367ecf7eb86267cd1d358ae9cff984cdb
--- /dev/null
+++ b/build/torch29-cxx11-cu128-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_07c987f
+ops = torch.ops._quantization_eetq_cuda_07c987f
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_07c987f::{op_name}"
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_07c987f.abi3.so b/build/torch29-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_07c987f.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..01fcff319d05e27a67d6e40d2ec4a74f531ebd4e
--- /dev/null
+++ b/build/torch29-cxx11-cu128-x86_64-linux/_quantization_eetq_cuda_07c987f.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7b185f90016c91944fbe70d6907ea4597e41f7be5777e57a01d11cbb4922478
+size 45378760
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/custom_ops.py b/build/torch29-cxx11-cu128-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch29-cxx11-cu128-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/metadata.json b/build/torch29-cxx11-cu128-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda
--- /dev/null
+++ b/build/torch29-cxx11-cu128-x86_64-linux/metadata.json
@@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..03dbc1afe1cf156661a2b1b22003cd5f599a0309
--- /dev/null
+++ b/build/torch29-cxx11-cu128-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import sys
+
+import importlib
+from pathlib import Path
+from types import ModuleType
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu129-aarch64-linux/__init__.py b/build/torch29-cxx11-cu129-aarch64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch29-cxx11-cu129-aarch64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch29-cxx11-cu129-aarch64-linux/_ops.py b/build/torch29-cxx11-cu129-aarch64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch29-cxx11-cu129-aarch64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch29-cxx11-cu129-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch29-cxx11-cu129-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..7aecd2d8a082cb4c7d476ff5525648cc63d28991
--- /dev/null
+++ b/build/torch29-cxx11-cu129-aarch64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:014cc9a5b6b656e73adb4f3cfdb236ad6033e3a3d5083f23ccbb4ae51d251f14
+size 46411448
diff --git a/build/torch29-cxx11-cu129-aarch64-linux/custom_ops.py b/build/torch29-cxx11-cu129-aarch64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch29-cxx11-cu129-aarch64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch29-cxx11-cu129-aarch64-linux/metadata.json b/build/torch29-cxx11-cu129-aarch64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda
--- /dev/null
+++ b/build/torch29-cxx11-cu129-aarch64-linux/metadata.json
@@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}
diff --git a/build/torch29-cxx11-cu129-aarch64-linux/quantization_eetq/__init__.py b/build/torch29-cxx11-cu129-aarch64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch29-cxx11-cu129-aarch64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu129-x86_64-linux/__init__.py b/build/torch29-cxx11-cu129-x86_64-linux/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c65d0601c655d7acf1a12e61b6549618b46a70d7
--- /dev/null
+++ b/build/torch29-cxx11-cu129-x86_64-linux/__init__.py
@@ -0,0 +1,3 @@
+from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
+
+__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
diff --git a/build/torch29-cxx11-cu129-x86_64-linux/_ops.py b/build/torch29-cxx11-cu129-x86_64-linux/_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ecc447c9f7708830d1cc06986da9d62a31bd96
--- /dev/null
+++ b/build/torch29-cxx11-cu129-x86_64-linux/_ops.py
@@ -0,0 +1,9 @@
+import torch
+from . import _quantization_eetq_cuda_4bc0051
+ops = torch.ops._quantization_eetq_cuda_4bc0051
+
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_quantization_eetq_cuda_4bc0051::{op_name}"
diff --git a/build/torch29-cxx11-cu129-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so b/build/torch29-cxx11-cu129-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..875b04fdf7ea20d4518e619dce9536830964d858
--- /dev/null
+++ b/build/torch29-cxx11-cu129-x86_64-linux/_quantization_eetq_cuda_4bc0051.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad9bbfd0d2b97d8e622be4c589b4603e625e2b5a93c35f199358d6dfa7b9ffdf
+size 46493136
diff --git a/build/torch29-cxx11-cu129-x86_64-linux/custom_ops.py b/build/torch29-cxx11-cu129-x86_64-linux/custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..005b5a6e3cd5f7bcfd4aa5d7d80d60a5ed9fab88
--- /dev/null
+++ b/build/torch29-cxx11-cu129-x86_64-linux/custom_ops.py
@@ -0,0 +1,36 @@
+from typing import List
+import torch
+
+from ._ops import ops
+
+
+def w8_a16_gemm(
+    input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
+) -> torch.Tensor:
+    return ops.w8_a16_gemm(input, weight, scale)
+
+
+def w8_a16_gemm_(
+    input: torch.Tensor,
+    weight: torch.Tensor,
+    scale: torch.Tensor,
+    output: torch.Tensor,
+    m: int,
+    n: int,
+    k: int,
+) -> torch.Tensor:
+    return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
+
+
+def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
+    return ops.preprocess_weights(origin_weight, is_int4)
+
+
+def quant_weights(
+    origin_weight: torch.Tensor,
+    quant_type: torch.dtype,
+    return_unprocessed_quantized_tensor: bool,
+) -> List[torch.Tensor]:
+    return ops.quant_weights(
+        origin_weight, quant_type, return_unprocessed_quantized_tensor
+    )
diff --git a/build/torch29-cxx11-cu129-x86_64-linux/metadata.json b/build/torch29-cxx11-cu129-x86_64-linux/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b796af185fbbd8594fcd846949aa5fadc0ccdda
--- /dev/null
+++ b/build/torch29-cxx11-cu129-x86_64-linux/metadata.json
@@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "license": "Apache-2.0",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "10.1",
+      "12.0+PTX",
+      "7.0",
+      "7.2",
+      "7.5",
+      "8.0",
+      "8.6",
+      "8.7",
+      "8.9",
+      "9.0"
+    ]
+  }
+}
diff --git a/build/torch29-cxx11-cu129-x86_64-linux/quantization_eetq/__init__.py b/build/torch29-cxx11-cu129-x86_64-linux/quantization_eetq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23
--- /dev/null
+++ b/build/torch29-cxx11-cu129-x86_64-linux/quantization_eetq/__init__.py
@@ -0,0 +1,26 @@
+import ctypes
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))