Spaces:

openaccess-ai-collective
/

ggml-ui

Build error

App Files Files Community

winglian commited on May 15, 2023

Commit

e3ba05b

1 Parent(s): 4cc03d2

rm docker implementation, add llama-cpp-python builder github actions, update copy to identify model in ui

Browse files

Files changed (5) hide show

.github/workflows/build-llama-cpp-wheel.yml +58 -0
Dockerfile +0 -68
Dockerfile-llama-cpp-wheel +50 -0
chat.py +4 -2
entrypoint.sh +0 -6

.github/workflows/build-llama-cpp-wheel.yml ADDED Viewed

	@@ -0,0 +1,58 @@

+name: Build wheel in Docker
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'Dockerfile-llama-cpp-wheel'
+  release:
+    types: [published]
+jobs:
+  build:
+    runs-on: self-hosted
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - name: Build Docker image
+        run: docker build . -t artifact-builder -f Dockerfile-llama-cpp-wheel
+      - name: Run Docker container
+        run: docker run --name my-artifact-builder artifact-builder
+      - name: Copy GPU & CPU artifact from Docker container
+        run: |
+          docker cp my-artifact-builder:/build/dists/*.whl .
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: wheels
+          path: |
+            *.whl
+  release:
+    needs: build
+    runs-on: self-hosted
+    if: github.event_name == 'release'
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - name: Download artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: wheels
+      - name: Release
+        uses: softprops/action-gh-release@v1
+        with:
+          files: |
+            *.whl
+          token: ${{ secrets.GITHUB_TOKEN }}

Dockerfile DELETED Viewed

@@ -1,68 +0,0 @@
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder-llamacpp
-RUN apt-get update && \
-    apt-get install --no-install-recommends -y git vim build-essential python3 python3-pip python3-dev python3-venv libblas-dev liblapack-dev libopenblas-dev cmake && \
-    rm -rf /var/lib/apt/lists/* && \
-    pip3 install scikit-build
-RUN git clone --depth 1 --branch v0.1.49 https://github.com/abetlen/llama-cpp-python.git /build
-RUN git clone https://github.com/ggerganov/llama.cpp.git /build/vendor/llama.cpp
-WORKDIR /build
-RUN CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py bdist_wheel
-# dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
-FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
-LABEL maintainer="Wing Lian <wing.lian@gmail.com>"
-RUN apt-get update && \
-    apt-get install --no-install-recommends -y  \
-    build-essential curl libportaudio2 libasound-dev git python3 python3-pip make g++  \
-    libffi-dev libncurses5-dev zlib1g zlib1g-dev libreadline-dev libbz2-dev libsqlite3-dev libssl-dev \
-    libblas-dev liblapack-dev libopenblas-dev cmake git-lfs && \
-    git lfs install && \
-    rm -rf /var/lib/apt/lists/*
-RUN groupadd -g 1000 appuser && \
-    useradd -r -u 1000 -g appuser appuser -m -d /home/appuser
-RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
-RUN mkdir /app
-RUN mkdir -p /opt/venv
-RUN chown -R appuser:appuser /app
-WORKDIR /app
-RUN virtualenv /opt/venv
-RUN . /opt/venv/bin/activate && \
-    pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
-    pip3 install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" "bitsandbytes" "gradio" && \
-    pip3 install --no-cache-dir torch torchvision torchaudio
-COPY --from=builder-llamacpp /build/dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl /app
-RUN . /opt/venv/bin/activate && \
-    pip3 uninstall llama_cpp_python && \
-    pip3 install /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl && \
-    rm /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
-COPY requirements.txt /app/requirements.txt
-RUN . /opt/venv/bin/activate && \
-    pip3 install --no-cache-dir -r requirements.txt
-RUN cp /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
-COPY . /app/
-RUN mkdir -p /opt/cache/huggingface/hub
-RUN chown -R appuser:appuser /app && find /app -type d -exec chmod 0755 {} \;
-RUN chown -R appuser:appuser /home/appuser
-RUN chmod +x /app/entrypoint.sh && \
-    chmod +x /app/app.py
-ENV TRANSFORMERS_CACHE=/opt/cache/huggingface/hub
-USER appuser
-ENTRYPOINT ["/app/entrypoint.sh"]

Dockerfile-llama-cpp-wheel ADDED Viewed

	@@ -0,0 +1,50 @@

+FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
+ARG LLAMA_CPP_VERSION="0.1.50"
+ARG CMAKE_VERSION=3.26
+ARG CMAKE_VERSION_PATCH=3.26.3
+ARG CMAKE_OS=linux
+ARG DEBIAN_FRONTEND=noninteractive
+ENV TZ=UTC
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y \
+      curl git vim build-essential software-properties-common python3 python3-pip python3-dev python3-venv \
+      libffi-dev libncurses5-dev zlib1g zlib1g-dev libreadline-dev libbz2-dev libsqlite3-dev libssl-dev \
+      libblas-dev liblapack-dev libopenblas-dev cmake && \
+    add-apt-repository ppa:ubuntu-toolchain-r/test && \
+    apt-get update && \
+    apt install --no-install-recommends -y gcc-10 g++-10 && \
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 && \
+    rm -rf /var/lib/apt/lists/* && \
+    pip3 install scikit-build
+RUN curl -L https://cmake.org/files/v$CMAKE_VERSION/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh -o /tmp/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh && \
+    mkdir /opt/cmake && \
+    sh /tmp/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh --skip-license --prefix=/opt/cmake && \
+    ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
+RUN useradd -m -u 1000 appuser
+WORKDIR /build
+RUN chown appuser:appuser /build
+USER appuser
+ENV HOME /home/appuser
+ENV PYENV_ROOT $HOME/.pyenv
+ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
+RUN git clone --depth 1 --branch v$LLAMA_CPP_VERSION https://github.com/abetlen/llama-cpp-python.git /build
+RUN git clone https://github.com/ggerganov/llama.cpp.git /build/vendor/llama.cpp
+RUN curl https://pyenv.run | bash
+RUN pyenv install 3.8.9 && \
+    pyenv global 3.8.9 && \
+    pyenv rehash && \
+    pip install --no-cache-dir --upgrade pip==22.3.1 setuptools wheel && \
+    pip install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" "scikit-build" && \
+    CMAKE_ARGS="-DLLAMA_CUBLAS=on -DLLAMA_OPENBLAS=off" FORCE_CMAKE=1 python3 setup.py bdist_wheel && \
+    mkdir /build/dists/ && \
+    cp dist/llama_cpp_python-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl dists/llama_cpp_python-gpu-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl && \
+    CMAKE_ARGS="-DLLAMA_CUBLAS=off -DLLAMA_OPENBLAS=off" FORCE_CMAKE=1 python3 setup.py bdist_wheel && \
+    cp dist/llama_cpp_python-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl dists/llama_cpp_python-cpu-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl && \
+    ls -l /build/dists/

chat.py CHANGED Viewed

@@ -90,10 +90,12 @@ with blocks:
     )
     stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
-    gr.Markdown("""
         - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
         - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
-        - When using your own models, simply update the [./config.yml](./config.yml)")
         - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
         """)

     )
     stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
+    gr.Markdown(f"""
+        - This is the {config["repo"]}/{config["file"]} model.
+        - This Space uses GGML with GPU support, so it can run larger models on smaller GPUs & VRAM quickly.
         - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
         - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
+        - When using your own models, simply update the [config.yml](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui/blob/main/config.yml)")
         - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
         """)

entrypoint.sh DELETED Viewed

@@ -1,6 +0,0 @@
-#!/bin/bash
-source /opt/venv/bin/activate
-export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
-python3 chat.py