Spaces:
Build error
Build error
rm docker implementation, add llama-cpp-python builder github actions, update copy to identify model in ui
Browse files- .github/workflows/build-llama-cpp-wheel.yml +58 -0
- Dockerfile +0 -68
- Dockerfile-llama-cpp-wheel +50 -0
- chat.py +4 -2
- entrypoint.sh +0 -6
.github/workflows/build-llama-cpp-wheel.yml
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Build wheel in Docker
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
paths:
|
8 |
+
- 'Dockerfile-llama-cpp-wheel'
|
9 |
+
release:
|
10 |
+
types: [published]
|
11 |
+
|
12 |
+
jobs:
|
13 |
+
build:
|
14 |
+
runs-on: self-hosted
|
15 |
+
permissions:
|
16 |
+
contents: write
|
17 |
+
steps:
|
18 |
+
- name: Checkout code
|
19 |
+
uses: actions/checkout@v2
|
20 |
+
|
21 |
+
- name: Build Docker image
|
22 |
+
run: docker build . -t artifact-builder -f Dockerfile-llama-cpp-wheel
|
23 |
+
|
24 |
+
- name: Run Docker container
|
25 |
+
run: docker run --name my-artifact-builder artifact-builder
|
26 |
+
|
27 |
+
- name: Copy GPU & CPU artifact from Docker container
|
28 |
+
run: |
|
29 |
+
docker cp my-artifact-builder:/build/dists/*.whl .
|
30 |
+
|
31 |
+
- name: Upload artifacts
|
32 |
+
uses: actions/upload-artifact@v3
|
33 |
+
with:
|
34 |
+
name: wheels
|
35 |
+
path: |
|
36 |
+
*.whl
|
37 |
+
|
38 |
+
release:
|
39 |
+
needs: build
|
40 |
+
runs-on: self-hosted
|
41 |
+
if: github.event_name == 'release'
|
42 |
+
permissions:
|
43 |
+
contents: write
|
44 |
+
steps:
|
45 |
+
- name: Checkout code
|
46 |
+
uses: actions/checkout@v2
|
47 |
+
|
48 |
+
- name: Download artifacts
|
49 |
+
uses: actions/download-artifact@v3
|
50 |
+
with:
|
51 |
+
name: wheels
|
52 |
+
|
53 |
+
- name: Release
|
54 |
+
uses: softprops/action-gh-release@v1
|
55 |
+
with:
|
56 |
+
files: |
|
57 |
+
*.whl
|
58 |
+
token: ${{ secrets.GITHUB_TOKEN }}
|
Dockerfile
DELETED
@@ -1,68 +0,0 @@
|
|
1 |
-
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder-llamacpp
|
2 |
-
|
3 |
-
RUN apt-get update && \
|
4 |
-
apt-get install --no-install-recommends -y git vim build-essential python3 python3-pip python3-dev python3-venv libblas-dev liblapack-dev libopenblas-dev cmake && \
|
5 |
-
rm -rf /var/lib/apt/lists/* && \
|
6 |
-
pip3 install scikit-build
|
7 |
-
|
8 |
-
RUN git clone --depth 1 --branch v0.1.49 https://github.com/abetlen/llama-cpp-python.git /build
|
9 |
-
RUN git clone https://github.com/ggerganov/llama.cpp.git /build/vendor/llama.cpp
|
10 |
-
|
11 |
-
WORKDIR /build
|
12 |
-
|
13 |
-
RUN CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py bdist_wheel
|
14 |
-
# dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
|
15 |
-
|
16 |
-
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
|
17 |
-
|
18 |
-
LABEL maintainer="Wing Lian <wing.lian@gmail.com>"
|
19 |
-
|
20 |
-
RUN apt-get update && \
|
21 |
-
apt-get install --no-install-recommends -y \
|
22 |
-
build-essential curl libportaudio2 libasound-dev git python3 python3-pip make g++ \
|
23 |
-
libffi-dev libncurses5-dev zlib1g zlib1g-dev libreadline-dev libbz2-dev libsqlite3-dev libssl-dev \
|
24 |
-
libblas-dev liblapack-dev libopenblas-dev cmake git-lfs && \
|
25 |
-
git lfs install && \
|
26 |
-
rm -rf /var/lib/apt/lists/*
|
27 |
-
|
28 |
-
RUN groupadd -g 1000 appuser && \
|
29 |
-
useradd -r -u 1000 -g appuser appuser -m -d /home/appuser
|
30 |
-
|
31 |
-
RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
|
32 |
-
RUN mkdir /app
|
33 |
-
RUN mkdir -p /opt/venv
|
34 |
-
RUN chown -R appuser:appuser /app
|
35 |
-
|
36 |
-
WORKDIR /app
|
37 |
-
|
38 |
-
RUN virtualenv /opt/venv
|
39 |
-
RUN . /opt/venv/bin/activate && \
|
40 |
-
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
|
41 |
-
pip3 install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" "bitsandbytes" "gradio" && \
|
42 |
-
pip3 install --no-cache-dir torch torchvision torchaudio
|
43 |
-
|
44 |
-
COPY --from=builder-llamacpp /build/dist/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl /app
|
45 |
-
RUN . /opt/venv/bin/activate && \
|
46 |
-
pip3 uninstall llama_cpp_python && \
|
47 |
-
pip3 install /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl && \
|
48 |
-
rm /app/llama_cpp_python-0.1.49-cp310-cp310-linux_x86_64.whl
|
49 |
-
|
50 |
-
COPY requirements.txt /app/requirements.txt
|
51 |
-
RUN . /opt/venv/bin/activate && \
|
52 |
-
pip3 install --no-cache-dir -r requirements.txt
|
53 |
-
|
54 |
-
RUN cp /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /opt/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
|
55 |
-
|
56 |
-
COPY . /app/
|
57 |
-
|
58 |
-
RUN mkdir -p /opt/cache/huggingface/hub
|
59 |
-
RUN chown -R appuser:appuser /app && find /app -type d -exec chmod 0755 {} \;
|
60 |
-
RUN chown -R appuser:appuser /home/appuser
|
61 |
-
RUN chmod +x /app/entrypoint.sh && \
|
62 |
-
chmod +x /app/app.py
|
63 |
-
|
64 |
-
ENV TRANSFORMERS_CACHE=/opt/cache/huggingface/hub
|
65 |
-
|
66 |
-
USER appuser
|
67 |
-
|
68 |
-
ENTRYPOINT ["/app/entrypoint.sh"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile-llama-cpp-wheel
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
|
2 |
+
|
3 |
+
ARG LLAMA_CPP_VERSION="0.1.50"
|
4 |
+
ARG CMAKE_VERSION=3.26
|
5 |
+
ARG CMAKE_VERSION_PATCH=3.26.3
|
6 |
+
ARG CMAKE_OS=linux
|
7 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
8 |
+
ENV TZ=UTC
|
9 |
+
|
10 |
+
RUN apt-get update && \
|
11 |
+
apt-get install --no-install-recommends -y \
|
12 |
+
curl git vim build-essential software-properties-common python3 python3-pip python3-dev python3-venv \
|
13 |
+
libffi-dev libncurses5-dev zlib1g zlib1g-dev libreadline-dev libbz2-dev libsqlite3-dev libssl-dev \
|
14 |
+
libblas-dev liblapack-dev libopenblas-dev cmake && \
|
15 |
+
add-apt-repository ppa:ubuntu-toolchain-r/test && \
|
16 |
+
apt-get update && \
|
17 |
+
apt install --no-install-recommends -y gcc-10 g++-10 && \
|
18 |
+
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 && \
|
19 |
+
rm -rf /var/lib/apt/lists/* && \
|
20 |
+
pip3 install scikit-build
|
21 |
+
RUN curl -L https://cmake.org/files/v$CMAKE_VERSION/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh -o /tmp/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh && \
|
22 |
+
mkdir /opt/cmake && \
|
23 |
+
sh /tmp/cmake-$CMAKE_VERSION_PATCH-$CMAKE_OS-x86_64.sh --skip-license --prefix=/opt/cmake && \
|
24 |
+
ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
|
25 |
+
|
26 |
+
RUN useradd -m -u 1000 appuser
|
27 |
+
|
28 |
+
WORKDIR /build
|
29 |
+
RUN chown appuser:appuser /build
|
30 |
+
USER appuser
|
31 |
+
|
32 |
+
ENV HOME /home/appuser
|
33 |
+
ENV PYENV_ROOT $HOME/.pyenv
|
34 |
+
ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
|
35 |
+
|
36 |
+
RUN git clone --depth 1 --branch v$LLAMA_CPP_VERSION https://github.com/abetlen/llama-cpp-python.git /build
|
37 |
+
RUN git clone https://github.com/ggerganov/llama.cpp.git /build/vendor/llama.cpp
|
38 |
+
RUN curl https://pyenv.run | bash
|
39 |
+
|
40 |
+
RUN pyenv install 3.8.9 && \
|
41 |
+
pyenv global 3.8.9 && \
|
42 |
+
pyenv rehash && \
|
43 |
+
pip install --no-cache-dir --upgrade pip==22.3.1 setuptools wheel && \
|
44 |
+
pip install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" "scikit-build" && \
|
45 |
+
CMAKE_ARGS="-DLLAMA_CUBLAS=on -DLLAMA_OPENBLAS=off" FORCE_CMAKE=1 python3 setup.py bdist_wheel && \
|
46 |
+
mkdir /build/dists/ && \
|
47 |
+
cp dist/llama_cpp_python-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl dists/llama_cpp_python-gpu-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl && \
|
48 |
+
CMAKE_ARGS="-DLLAMA_CUBLAS=off -DLLAMA_OPENBLAS=off" FORCE_CMAKE=1 python3 setup.py bdist_wheel && \
|
49 |
+
cp dist/llama_cpp_python-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl dists/llama_cpp_python-cpu-${LLAMA_CPP_VERSION}-cp38-cp38-linux_x86_64.whl && \
|
50 |
+
ls -l /build/dists/
|
chat.py
CHANGED
@@ -90,10 +90,12 @@ with blocks:
|
|
90 |
)
|
91 |
stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
|
92 |
|
93 |
-
gr.Markdown("""
|
|
|
|
|
94 |
- This is running on a smaller, shared GPU, so it may take a few seconds to respond.
|
95 |
- [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
|
96 |
-
- When using your own models, simply update the [
|
97 |
- Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
|
98 |
""")
|
99 |
|
|
|
90 |
)
|
91 |
stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
|
92 |
|
93 |
+
gr.Markdown(f"""
|
94 |
+
- This is the {config["repo"]}/{config["file"]} model.
|
95 |
+
- This Space uses GGML with GPU support, so it can run larger models on smaller GPUs & VRAM quickly.
|
96 |
- This is running on a smaller, shared GPU, so it may take a few seconds to respond.
|
97 |
- [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
|
98 |
+
- When using your own models, simply update the [config.yml](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui/blob/main/config.yml)")
|
99 |
- Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
|
100 |
""")
|
101 |
|
entrypoint.sh
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
source /opt/venv/bin/activate
|
4 |
-
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
5 |
-
|
6 |
-
python3 chat.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|