Update README.md
Browse files
README.md
CHANGED
|
@@ -47,10 +47,12 @@ conda activate infinity_parser2
|
|
| 47 |
# Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
|
| 48 |
pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
|
| 49 |
|
| 50 |
-
# Install FlashAttention (
|
| 51 |
# This command builds flash-attn from source, which can take 10 to 30 minutes.
|
|
|
|
|
|
|
| 52 |
pip install flash-attn==2.8.3 --no-build-isolation
|
| 53 |
-
# For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See
|
| 54 |
|
| 55 |
# Install vLLM
|
| 56 |
# NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.
|
|
|
|
| 47 |
# Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
|
| 48 |
pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
|
| 49 |
|
| 50 |
+
# Install FlashAttention (recommend for NVIDIA GPUs).
|
| 51 |
# This command builds flash-attn from source, which can take 10 to 30 minutes.
|
| 52 |
+
# To speed up installation, download the appropriate wheel from the official releases (https://github.com/Dao-AILab/flash-attention/releases), then run:
|
| 53 |
+
# pip install /path/to/<wheel_filename>.whl
|
| 54 |
pip install flash-attn==2.8.3 --no-build-isolation
|
| 55 |
+
# NOTE: For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See: https://github.com/Dao-AILab/flash-attention.
|
| 56 |
|
| 57 |
# Install vLLM
|
| 58 |
# NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.
|