winglian commited on
Commit
e3ba05b
1 Parent(s): 4cc03d2

rm docker implementation, add llama-cpp-python builder github actions, update copy to identify model in ui

Browse files
.github/workflows/build-llama-cpp-wheel.yml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build wheel in Docker
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - 'Dockerfile-llama-cpp-wheel'
9
+ release:
10
+ types: [published]
11
+
12
+ jobs:
13
+ build:
14
+ runs-on: self-hosted
15
+ permissions:
16
+ contents: write
17
+ steps:
18
+ - name: Checkout code
19
+ uses: actions/checkout@v2
20
+
21
+ - name: Build Docker image
22
+ run: docker build . -t artifact-builder -f Dockerfile-llama-cpp-wheel
23
+
24
+ - name: Run Docker container
25
+ run: docker run --name my-artifact-builder artifact-builder
26
+
27
+ - name: Copy GPU & CPU artifact from Docker container
28
+ run: |
29
+ docker cp my-artifact-builder:/build/dists/*.whl .
30
+
31
+ - name: Upload artifacts
32
+ uses: actions/upload-artifact@v3
33
+ with:
34
+ name: wheels
35
+ path: |
36
+ *.whl
37
+
38
+ release:
39
+ needs: build
40
+ runs-on: self-hosted
41
+ if: github.event_name == 'release'
42
+ permissions:
43
+ contents: write
44
+ steps:
45
+ - name: Checkout code
46
+ uses: actions/checkout@v2
47
+
48
+ - name: Download artifacts
49
+ uses: actions/download-artifact@v3
50
+ with:
51
+ name: wheels
52
+
53
+ - name: Release
54
+ uses: softprops/action-gh-release@v1
55
+ with:
56
+ files: |
57
+ *.whl
58
+ token: ${{ secrets.GITHUB_TOKEN }}
Dockerfile DELETED
@@ -1,68 +0,0 @@
1
- FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder-llamacpp
2
-
3
- RUN apt-get update && \
4
- apt-get install --no-install-recommends -y git vim build-essential python3 python3-pip python3-dev python3-venv libblas-dev liblapack-dev libopenblas-dev cmake && \
5
- rm -rf /var/lib/apt/lists/* && \
6
- pip3 install scikit-build
7
-
8
- RUN git clone --depth 1 --branch v0.1.49 https://github.com/abetlen/llama-cpp-python.git /build
9
- RUN git clone https://github.com/ggerganov/llama.cpp.git /build/vendor/llama.cpp
10
-
11
- WORKDIR /build
12
-
13
- RUN CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py bdist_wheel
14
- # dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
15
-
16
- FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
17
-
18
- LABEL maintainer="Wing Lian <wing.lian@gmail.com>"
19
-
20
- RUN apt-get update && \
21
- apt-get install --no-install-recommends -y \
22
- build-essential curl libportaudio2 libasound-dev git python3 python3-pip make g++ \
23
- libffi-dev libncurses5-dev zlib1g zlib1g-dev libreadline-dev libbz2-dev libsqlite3-dev libssl-dev \
24
- libblas-dev liblapack-dev libopenblas-dev cmake git-lfs && \
25
- git lfs install && \
26
- rm -rf /var/lib/apt/lists/*
27
-
28
- RUN groupadd -g 1000 appuser && \
29
- useradd -r -u 1000 -g appuser appuser -m -d /home/appuser
30
-
31
- RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
32
- RUN mkdir /app
33
- RUN mkdir -p /opt/venv
34
- RUN chown -R appuser:appuser /app
35
-
36
- WORKDIR /app
37
-
38
- RUN virtualenv /opt/venv
39
- RUN . /opt/venv/bin/activate && \
40
- pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
41
- pip3 install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" "bitsandbytes" "gradio" && \
42
- pip3 install --no-cache-dir torch torchvision torchaudio
43
-
44
- COPY --from=builder-llamacpp /build/dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl /app
45
- RUN . /opt/venv/bin/activate && \
46
- pip3 uninstall llama_cpp_python && \
47
- pip3 install /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl && \
48
- rm /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
49
-
50
- COPY requirements.txt /app/requirements.txt
51
- RUN . /opt/venv/bin/activate && \
52
- pip3 install --no-cache-dir -r requirements.txt
53
-
54
- RUN cp /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
55
-
56
- COPY . /app/
57
-
58
- RUN mkdir -p /opt/cache/huggingface/hub
59
- RUN chown -R appuser:appuser /app && find /app -type d -exec chmod 0755 {} \;
60
- RUN chown -R appuser:appuser /home/appuser
61
- RUN chmod +x /app/entrypoint.sh && \
62
- chmod +x /app/app.py
63
-
64
- ENV TRANSFORMERS_CACHE=/opt/cache/huggingface/hub
65
-
66
- USER appuser
67
-
68
- ENTRYPOINT ["/app/entrypoint.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile-llama-cpp-wheel ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
2
+
3
+ ARG LLAMA_CPP_VERSION="0.1.50"
4
+ ARG CMAKE_VERSION=3.26
5
+ ARG CMAKE_VERSION_PATCH=3.26.3
6
+ ARG CMAKE_OS=linux
7
+ ARG DEBIAN_FRONTEND=noninteractive
8
+ ENV TZ=UTC
9
+
10
+ RUN apt-get update && \
11
+ apt-get install --no-install-recommends -y \
12
+ curl git vim build-essential software-properties-common python3 python3-pip python3-dev python3-venv \
13
+ libffi-dev libncurses5-dev zlib1g zlib1g-dev libreadline-dev libbz2-dev libsqlite3-dev libssl-dev \
14
+ libblas-dev liblapack-dev libopenblas-dev cmake && \
15
+ add-apt-repository ppa:ubuntu-toolchain-r/test && \
16
+ apt-get update && \
17
+ apt install --no-install-recommends -y gcc-10 g++-10 && \
18
+ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 && \
19
+ rm -rf /var/lib/apt/lists/* && \
20
+ pip3 install scikit-build
21
+ RUN curl -L https://cmake.org/files/v$CMAKE_VERSION/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh -o /tmp/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh && \
22
+ mkdir /opt/cmake && \
23
+ sh /tmp/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh --skip-license --prefix=/opt/cmake && \
24
+ ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
25
+
26
+ RUN useradd -m -u 1000 appuser
27
+
28
+ WORKDIR /build
29
+ RUN chown appuser:appuser /build
30
+ USER appuser
31
+
32
+ ENV HOME /home/appuser
33
+ ENV PYENV_ROOT $HOME/.pyenv
34
+ ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
35
+
36
+ RUN git clone --depth 1 --branch v$LLAMA_CPP_VERSION https://github.com/abetlen/llama-cpp-python.git /build
37
+ RUN git clone https://github.com/ggerganov/llama.cpp.git /build/vendor/llama.cpp
38
+ RUN curl https://pyenv.run | bash
39
+
40
+ RUN pyenv install 3.8.9 && \
41
+ pyenv global 3.8.9 && \
42
+ pyenv rehash && \
43
+ pip install --no-cache-dir --upgrade pip==22.3.1 setuptools wheel && \
44
+ pip install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" "scikit-build" && \
45
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on -DLLAMA_OPENBLAS=off" FORCE_CMAKE=1 python3 setup.py bdist_wheel && \
46
+ mkdir /build/dists/ && \
47
+ cp dist/llama_cpp_python-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl dists/llama_cpp_python-gpu-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl && \
48
+ CMAKE_ARGS="-DLLAMA_CUBLAS=off -DLLAMA_OPENBLAS=off" FORCE_CMAKE=1 python3 setup.py bdist_wheel && \
49
+ cp dist/llama_cpp_python-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl dists/llama_cpp_python-cpu-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl && \
50
+ ls -l /build/dists/
chat.py CHANGED
@@ -90,10 +90,12 @@ with blocks:
90
  )
91
  stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
92
 
93
- gr.Markdown("""
 
 
94
  - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
95
  - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
96
- - When using your own models, simply update the [./config.yml](./config.yml)")
97
  - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
98
  """)
99
 
 
90
  )
91
  stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
92
 
93
+ gr.Markdown(f"""
94
+ - This is the {config["repo"]}/{config["file"]} model.
95
+ - This Space uses GGML with GPU support, so it can run larger models on smaller GPUs & VRAM quickly.
96
  - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
97
  - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
98
+ - When using your own models, simply update the [config.yml](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui/blob/main/config.yml)")
99
  - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
100
  """)
101
 
entrypoint.sh DELETED
@@ -1,6 +0,0 @@
1
- #!/bin/bash
2
-
3
- source /opt/venv/bin/activate
4
- export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
5
-
6
- python3 chat.py