Luke Stanley commited on
Commit
c355718
1 Parent(s): 74d6e52

Adds Gradio app wrapper and Dockerfile

Browse files
Files changed (2) hide show
  1. Dockerfile +51 -0
  2. app.py +34 -0
Dockerfile ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
2
+ FROM nvidia/cuda:${CUDA_IMAGE}
3
+
4
+ # We need to set the host to 0.0.0.0 to allow outside access
5
+ ENV HOST 0.0.0.0
6
+
7
+ RUN apt-get update && apt-get upgrade -y \
8
+ && apt-get install -y git build-essential \
9
+ python3 python3-pip gcc wget \
10
+ ocl-icd-opencl-dev opencl-headers clinfo \
11
+ libclblast-dev libopenblas-dev \
12
+ && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
13
+ RUN apt-get install git -y
14
+ COPY . .
15
+
16
+ # setting build related env vars
17
+ ENV CUDA_DOCKER_ARCH=all
18
+ ENV LLAMA_CUBLAS=1
19
+
20
+ RUN useradd -m -u 1000 user
21
+ # Switch to the "user" user
22
+ USER user
23
+ # Set home to the user's home directory
24
+ ENV HOME=/home/user \
25
+ PATH=/home/user/.local/bin:$PATH \
26
+ PYTHONPATH=$HOME/app \
27
+ PYTHONUNBUFFERED=1 \
28
+ GRADIO_ALLOW_FLAGGING=never \
29
+ GRADIO_NUM_PORTS=1 \
30
+ GRADIO_SERVER_NAME=0.0.0.0 \
31
+ GRADIO_THEME=huggingface \
32
+ SYSTEM=spaces
33
+
34
+ WORKDIR $HOME/app
35
+
36
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
37
+ COPY --chown=user . $HOME/app
38
+ # Install dependencies
39
+ RUN python3 -m pip install --upgrade pip && \
40
+ python3 -m pip install pytest cmake \
41
+ scikit-build setuptools fastapi uvicorn sse-starlette \
42
+ pydantic-settings starlette-context gradio huggingface_hub hf_transfer
43
+ RUN python3 -m pip install requests pydantic uvicorn starlette fastapi sse_starlette starlette_context pydantic_settings
44
+
45
+
46
+ # Install llama-cpp-python (build with cuda)
47
+ RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install git+https://github.com/lukestanley/llama-cpp-python.git@expose_json_grammar_convert_function
48
+
49
+ CMD ["python3", "app.py"]
50
+
51
+ # Credit to Radamés Ajna <radames@hf.co> for the original Dockerfile
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import system as run
2
+ from subprocess import check_output
3
+
4
+ import gradio as gr
5
+
6
+ # Without a GPU, we need to re-install llama-cpp-python to avoid an error.
7
+ # We use a shell command to detect if we have an NVIDIA GPU available:
8
+ use_gpu = True
9
+ try:
10
+ command = "nvidia-debugdump --list|grep Device"
11
+ output = str(check_output(command, shell=True).decode())
12
+ if "NVIDIA" in output and "ID" in output:
13
+ print("NVIDIA GPU detected.")
14
+ except Exception as e:
15
+ print("No NVIDIA GPU detected, using CPU. GPU check result:", e)
16
+ use_gpu = False
17
+
18
+ if use_gpu:
19
+ print("GPU detected, existing GPU focused llama-cpp-python should work.")
20
+ else:
21
+ print("Avoiding error by re-installing non-GPU llama-cpp-python build because no GPU was detected.")
22
+ run('pip uninstall llama-cpp-python -y')
23
+ run('pip install git+https://github.com/lukestanley/llama-cpp-python.git@expose_json_grammar_convert_function --upgrade --no-cache-dir --force-reinstall')
24
+ print("llama-cpp-python re-installed, will now attempt to load.")
25
+
26
+ # Now chill can import llama-cpp-python without an error:
27
+ from chill import improvement_loop
28
+
29
+
30
+ def greet(text):
31
+ return str(improvement_loop(text))
32
+
33
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
34
+ demo.launch(max_threads=1)