Spaces:

Dovakiins
/

qwerrwe

Build error

App Files Files Community

mhenrichsen Ubuntu mhenrichsen Mads Henrichsen

winglian commited on Aug 18, 2023

Commit

cf66547

unverified ·

1 Parent(s): 06edf17

flash attn pip install (#426)

Browse files

* flash attn pip

* add packaging

* add packaging to apt get

* install flash attn in dockerfile

* remove unused whls

* add wheel

* clean up pr

fix packaging requirement for ci
upgrade pip for ci
skip build isolation for requiremnents to get flash-attn working
install flash-attn seperately

* install wheel for ci

* no flash-attn for basic cicd

* install flash-attn as pip extras

---------

Co-authored-by: Ubuntu <mgh@mgh-vm.wsyvwcia0jxedeyrchqg425tpb.ax.internal.cloudapp.net>
Co-authored-by: mhenrichsen <some_email@hey.com>
Co-authored-by: Mads Henrichsen <mads@BrbartiendeMads.lan>
Co-authored-by: Wing Lian <wing.lian@gmail.com>

Files changed (6) hide show

.github/workflows/main.yml +6 -5
README.md +1 -1
docker/Dockerfile +2 -2
docker/Dockerfile-base +1 -26
requirements.txt +1 -0
setup.py +4 -1

.github/workflows/main.yml CHANGED Viewed

@@ -13,17 +13,17 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: cu118
             cuda_version: 11.8.0
             python_version: "3.9"
             pytorch: 2.0.1
             axolotl_extras:
-          - cuda: cu118
             cuda_version: 11.8.0
             python_version: "3.10"
             pytorch: 2.0.1
             axolotl_extras:
-          - cuda: cu118
             cuda_version: 11.8.0
             python_version: "3.9"
             pytorch: 2.0.1
@@ -49,10 +49,11 @@ jobs:
         with:
           context: .
           build-args: |
-            BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-${{ matrix.cuda }}-${{ matrix.pytorch }}
           file: ./docker/Dockerfile
           push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
           labels: ${{ steps.metadata.outputs.labels }}
   build-axolotl-runpod:
     needs: build-axolotl

       fail-fast: false
       matrix:
         include:
+          - cuda: 118
             cuda_version: 11.8.0
             python_version: "3.9"
             pytorch: 2.0.1
             axolotl_extras:
+          - cuda: 118
             cuda_version: 11.8.0
             python_version: "3.10"
             pytorch: 2.0.1
             axolotl_extras:
+          - cuda: 118
             cuda_version: 11.8.0
             python_version: "3.9"
             pytorch: 2.0.1
         with:
           context: .
           build-args: |
+            BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}
+            CUDA=${{ matrix.cuda }}
           file: ./docker/Dockerfile
           push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
           labels: ${{ steps.metadata.outputs.labels }}
   build-axolotl-runpod:
     needs: build-axolotl

README.md CHANGED Viewed

@@ -69,7 +69,7 @@ Get started with Axolotl in just a few steps! This quickstart guide will walk yo
 ```bash
 git clone https://github.com/OpenAccess-AI-Collective/axolotl
-pip3 install -e .
 pip3 install -U git+https://github.com/huggingface/peft.git
 # finetune lora

 ```bash
 git clone https://github.com/OpenAccess-AI-Collective/axolotl
+pip3 install -e .[flash-attn]
 pip3 install -U git+https://github.com/huggingface/peft.git
 # finetune lora

docker/Dockerfile CHANGED Viewed

@@ -16,9 +16,9 @@ RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN cd axolotl && \
     if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install -e .[$AXOLOTL_EXTRAS]; \
     else \
-        pip install -e .; \
     fi
 # fix so that git fetch/pull from remote works

 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN cd axolotl && \
     if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
+        pip install -e .[flash-attn,$AXOLOTL_EXTRAS]; \
     else \
+        pip install -e .[flash-attn]; \
     fi
 # fix so that git fetch/pull from remote works

docker/Dockerfile-base CHANGED Viewed

@@ -31,26 +31,6 @@ WORKDIR /workspace
 RUN python3 -m pip install --upgrade pip && pip3 install packaging && \
     python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA
-FROM base-builder AS flash-attn-builder
-WORKDIR /workspace
-ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
-RUN git clone https://github.com/Dao-AILab/flash-attention.git && \
-    cd flash-attention && \
-    git checkout v2.0.4  && \
-    python3 setup.py bdist_wheel && \
-    cd csrc/fused_dense_lib && \
-    python3 setup.py bdist_wheel && \
-    cd ../xentropy && \
-    python3 setup.py bdist_wheel && \
-    cd ../rotary && \
-    python3 setup.py bdist_wheel && \
-    cd ../layer_norm && \
-    python3 setup.py bdist_wheel
 FROM base-builder AS deepspeed-builder
 ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
@@ -90,13 +70,8 @@ RUN mkdir -p /workspace/wheels/bitsandbytes
 COPY --from=deepspeed-builder /workspace/DeepSpeed/dist/deepspeed-*.whl wheels
 COPY --from=bnb-builder /workspace/bitsandbytes/dist/bitsandbytes-*.whl wheels
 COPY --from=bnb-builder /workspace/bitsandbytes/bitsandbytes/libbitsandbytes*.so wheels/bitsandbytes
-COPY --from=flash-attn-builder /workspace/flash-attention/dist/flash_attn-*.whl wheels
-COPY --from=flash-attn-builder /workspace/flash-attention/csrc/fused_dense_lib/dist/fused_dense_lib-*.whl wheels
-COPY --from=flash-attn-builder /workspace/flash-attention/csrc/xentropy/dist/xentropy_cuda_lib-*.whl wheels
-COPY --from=flash-attn-builder /workspace/flash-attention/csrc/rotary/dist/rotary_emb-*.whl wheels
-COPY --from=flash-attn-builder /workspace/flash-attention/csrc/layer_norm/dist/dropout_layer_norm-*.whl wheels
-RUN pip3 install wheels/deepspeed-*.whl wheels/flash_attn-*.whl wheels/fused_dense_lib-*.whl wheels/xentropy_cuda_lib-*.whl wheels/rotary_emb-*.whl wheels/dropout_layer_norm-*.whl
 RUN cd /workspace/builds/bitsandbytes && python3 setup.py install
 RUN git lfs install --skip-repo
 RUN pip3 install awscli && \

 RUN python3 -m pip install --upgrade pip && pip3 install packaging && \
     python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA
 FROM base-builder AS deepspeed-builder
 ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
 COPY --from=deepspeed-builder /workspace/DeepSpeed/dist/deepspeed-*.whl wheels
 COPY --from=bnb-builder /workspace/bitsandbytes/dist/bitsandbytes-*.whl wheels
 COPY --from=bnb-builder /workspace/bitsandbytes/bitsandbytes/libbitsandbytes*.so wheels/bitsandbytes
+RUN pip3 install wheels/deepspeed-*.whl
 RUN cd /workspace/builds/bitsandbytes && python3 setup.py install
 RUN git lfs install --skip-repo
 RUN pip3 install awscli && \

requirements.txt CHANGED Viewed

@@ -6,6 +6,7 @@ addict
 fire
 PyYAML==6.0
 datasets
 sentencepiece
 wandb
 einops

 fire
 PyYAML==6.0
 datasets
+flash-attn==2.0.8
 sentencepiece
 wandb
 einops

setup.py CHANGED Viewed

@@ -7,6 +7,7 @@ with open("./requirements.txt", encoding="utf-8") as requirements_file:
     # don't include peft yet until we check the int4
     # need to manually install peft for now...
     reqs = [r.strip() for r in requirements_file.readlines() if "peft" not in r]
     reqs = [r for r in reqs if r and r[0] != "#"]
     for r in reqs:
         install_requires.append(r)
@@ -25,8 +26,10 @@ setup(
         "gptq_triton": [
             "alpaca_lora_4bit[triton] @ git+https://github.com/winglian/alpaca_lora_4bit.git@setup_pip",
         ],
         "extras": [
-            "flash-attn",
             "deepspeed",
         ],
     },

     # don't include peft yet until we check the int4
     # need to manually install peft for now...
     reqs = [r.strip() for r in requirements_file.readlines() if "peft" not in r]
+    reqs = [r for r in reqs if "flash-attn" not in r]
     reqs = [r for r in reqs if r and r[0] != "#"]
     for r in reqs:
         install_requires.append(r)
         "gptq_triton": [
             "alpaca_lora_4bit[triton] @ git+https://github.com/winglian/alpaca_lora_4bit.git@setup_pip",
         ],
+        "flash-attn": [
+            "flash-attn==2.0.8",
+        ],
         "extras": [
             "deepspeed",
         ],
     },