| import os |
| import torch |
| import platform |
| import requests |
| from pathlib import Path |
| from setuptools import setup, find_packages |
| from torch.utils.cpp_extension import CUDAExtension |
|
|
|
|
| def get_latest_kernels_version(repo): |
| """ |
| Get the latest version of the kernels from the github repo. |
| """ |
| response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest") |
| data = response.json() |
| tag_name = data["tag_name"] |
| version = tag_name.replace("v", "") |
| return version |
|
|
|
|
| def get_kernels_whl_url( |
| gpu_system_version, |
| release_version, |
| python_version, |
| platform, |
| architecture, |
| ): |
| """ |
| Get the url for the kernels wheel file. |
| """ |
| return f"https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v{release_version}/autoawq_kernels-{release_version}+{gpu_system_version}-cp{python_version}-cp{python_version}-{platform}_{architecture}.whl" |
|
|
|
|
| AUTOAWQ_VERSION = "0.2.4" |
| PYPI_BUILD = os.getenv("PYPI_BUILD", "0") == "1" |
|
|
| CUDA_VERSION = os.getenv("CUDA_VERSION", None) or torch.version.cuda |
| if CUDA_VERSION: |
| CUDA_VERSION = "".join(CUDA_VERSION.split("."))[:3] |
|
|
| ROCM_VERSION = os.getenv("ROCM_VERSION", None) or torch.version.hip |
| if ROCM_VERSION: |
| if ROCM_VERSION.startswith("5.6"): |
| ROCM_VERSION = "5.6.1" |
| elif ROCM_VERSION.startswith("5.7"): |
| ROCM_VERSION = "5.7.1" |
|
|
| ROCM_VERSION = "".join(ROCM_VERSION.split("."))[:3] |
|
|
| if not PYPI_BUILD: |
| if CUDA_VERSION: |
| AUTOAWQ_VERSION += f"+cu{CUDA_VERSION}" |
| elif ROCM_VERSION: |
| AUTOAWQ_VERSION += f"+rocm{ROCM_VERSION}" |
| else: |
| raise RuntimeError( |
| "Your system must have either Nvidia or AMD GPU to build this package." |
| ) |
|
|
| common_setup_kwargs = { |
| "version": AUTOAWQ_VERSION, |
| "name": "autoawq", |
| "author": "Casper Hansen", |
| "license": "MIT", |
| "python_requires": ">=3.8.0", |
| "description": "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference.", |
| "long_description": (Path(__file__).parent / "README.md").read_text( |
| encoding="UTF-8" |
| ), |
| "long_description_content_type": "text/markdown", |
| "url": "https://github.com/casper-hansen/AutoAWQ", |
| "keywords": ["awq", "autoawq", "quantization", "transformers"], |
| "platforms": ["linux", "windows"], |
| "classifiers": [ |
| "Environment :: GPU :: NVIDIA CUDA :: 11.8", |
| "Environment :: GPU :: NVIDIA CUDA :: 12", |
| "License :: OSI Approved :: MIT License", |
| "Natural Language :: English", |
| "Programming Language :: Python :: 3.8", |
| "Programming Language :: Python :: 3.9", |
| "Programming Language :: Python :: 3.10", |
| "Programming Language :: Python :: 3.11", |
| "Programming Language :: C++", |
| ], |
| } |
|
|
| requirements = [ |
| "torch>=2.0.1", |
| "transformers>=4.35.0", |
| "tokenizers>=0.12.1", |
| "typing_extensions>=4.8.0", |
| "accelerate", |
| "datasets", |
| "zstandard", |
| ] |
|
|
| try: |
| if ROCM_VERSION: |
| import exlv2_ext |
| else: |
| import awq_ext |
|
|
| KERNELS_INSTALLED = True |
| except ImportError: |
| KERNELS_INSTALLED = False |
|
|
| |
| |
| if not KERNELS_INSTALLED and (CUDA_VERSION or ROCM_VERSION): |
| if CUDA_VERSION and CUDA_VERSION.startswith("12"): |
| requirements.append("autoawq-kernels") |
| elif CUDA_VERSION and CUDA_VERSION.startswith("11") or ROCM_VERSION in ["561", "571"]: |
| gpu_system_version = ( |
| f"cu{CUDA_VERSION}" if CUDA_VERSION else f"rocm{ROCM_VERSION}" |
| ) |
| kernels_version = get_latest_kernels_version("casper-hansen/AutoAWQ_kernels") |
| python_version = "".join(platform.python_version_tuple()[:2]) |
| platform_name = platform.system().lower() |
| architecture = platform.machine().lower() |
| latest_rocm_kernels_wheels = get_kernels_whl_url( |
| gpu_system_version, |
| kernels_version, |
| python_version, |
| platform_name, |
| architecture, |
| ) |
| requirements.append(f"autoawq-kernels@{latest_rocm_kernels_wheels}") |
| else: |
| raise RuntimeError( |
| "Your system have a GPU with an unsupported CUDA or ROCm version. " |
| "Please install the kernels manually from https://github.com/casper-hansen/AutoAWQ_kernels" |
| ) |
|
|
| force_extension = os.getenv("PYPI_FORCE_TAGS", "0") |
| if force_extension == "1": |
| |
| |
| |
| common_setup_kwargs["ext_modules"] = [ |
| CUDAExtension( |
| name="test_kernel", |
| sources=[], |
| ) |
| ] |
|
|
| setup( |
| packages=find_packages(), |
| install_requires=requirements, |
| extras_require={ |
| "eval": ["lm_eval==0.4.1", "tabulate", "protobuf", "evaluate", "scipy"], |
| "dev": ["black", "mkdocstrings-python", "mkdocs-material", "griffe-typingdoc"] |
| }, |
| **common_setup_kwargs, |
| ) |
|
|