Spaces:
Running
on
A10G
Running
on
A10G
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 | |
ENV DEBIAN_FRONTEND=noninteractive | |
RUN apt-get update && \ | |
apt-get upgrade -y && \ | |
apt-get install -y --no-install-recommends \ | |
git \ | |
git-lfs \ | |
wget \ | |
curl \ | |
# python build dependencies \ | |
build-essential \ | |
libssl-dev \ | |
zlib1g-dev \ | |
libbz2-dev \ | |
libreadline-dev \ | |
libsqlite3-dev \ | |
libncursesw5-dev \ | |
xz-utils \ | |
tk-dev \ | |
libxml2-dev \ | |
libxmlsec1-dev \ | |
libffi-dev \ | |
liblzma-dev \ | |
ffmpeg \ | |
nvidia-driver-515 | |
RUN useradd -m -u 1000 user | |
USER user | |
ENV HOME=/home/user \ | |
PATH=/home/user/.local/bin:${PATH} | |
WORKDIR ${HOME}/app | |
RUN curl https://pyenv.run | bash | |
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH} | |
ARG PYTHON_VERSION=3.10.13 | |
RUN pyenv install ${PYTHON_VERSION} && \ | |
pyenv global ${PYTHON_VERSION} && \ | |
pyenv rehash && \ | |
pip install --no-cache-dir -U pip setuptools wheel && \ | |
pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.7" "APScheduler" | |
COPY --chown=1000 . ${HOME}/app | |
RUN git clone https://github.com/ggerganov/llama.cpp | |
RUN pip install -r llama.cpp/requirements.txt | |
COPY imatrix_calibration.txt ${HOME}/app/llama.cpp/ | |
ENV PYTHONPATH=${HOME}/app \ | |
PYTHONUNBUFFERED=1 \ | |
HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
GRADIO_ALLOW_FLAGGING=never \ | |
GRADIO_NUM_PORTS=1 \ | |
GRADIO_SERVER_NAME=0.0.0.0 \ | |
GRADIO_THEME=huggingface \ | |
TQDM_POSITION=-1 \ | |
TQDM_MININTERVAL=1 \ | |
SYSTEM=spaces \ | |
LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ | |
PATH=/usr/local/nvidia/bin:${PATH} | |
ENTRYPOINT ["/bin/bash", "-c", "cd llama.cpp && LLAMA_CUDA=1 make -j quantize gguf-split imatrix && cd .. && /bin/sh start.sh"] | |