zuminghuang commited on
Commit
e35b4c7
·
verified ·
1 Parent(s): b641725

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -2
README.md CHANGED
@@ -47,10 +47,12 @@ conda activate infinity_parser2
47
  # Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
48
  pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
49
 
50
- # Install FlashAttention (required for NVIDIA GPUs).
51
  # This command builds flash-attn from source, which can take 10 to 30 minutes.
 
 
52
  pip install flash-attn==2.8.3 --no-build-isolation
53
- # For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See the official guide at https://github.com/Dao-AILab/flash-attention.
54
 
55
  # Install vLLM
56
  # NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.
 
47
  # Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
48
  pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
49
 
50
+ # Install FlashAttention (recommend for NVIDIA GPUs).
51
  # This command builds flash-attn from source, which can take 10 to 30 minutes.
52
+ # To speed up installation, download the appropriate wheel from the official releases (https://github.com/Dao-AILab/flash-attention/releases), then run:
53
+ # pip install /path/to/<wheel_filename>.whl
54
  pip install flash-attn==2.8.3 --no-build-isolation
55
+ # NOTE: For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See: https://github.com/Dao-AILab/flash-attention.
56
 
57
  # Install vLLM
58
  # NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.