Severian commited on
Commit
5b800e8
·
1 Parent(s): 22e0496

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +47 -0
  2. app.py +84 -0
Dockerfile ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
2
+ FROM nvidia/cuda:${CUDA_IMAGE}
3
+
4
+ # We need to set the host to 0.0.0.0 to allow outside access
5
+ ENV HOST 0.0.0.0
6
+
7
+ RUN apt-get update && apt-get upgrade -y \
8
+ && apt-get install -y git build-essential \
9
+ python3 python3-pip gcc wget \
10
+ ocl-icd-opencl-dev opencl-headers clinfo \
11
+ libclblast-dev libopenblas-dev \
12
+ && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
13
+
14
+ COPY . .
15
+
16
+ # setting build related env vars
17
+ ENV CUDA_DOCKER_ARCH=all
18
+ ENV LLAMA_CUBLAS=1
19
+
20
+ # Install depencencies
21
+ RUN python3 -m pip install --upgrade pip pytest cmake \
22
+ scikit-build setuptools fastapi uvicorn sse-starlette \
23
+ pydantic-settings starlette-context gradio huggingface_hub hf_transfer
24
+
25
+ # Install llama-cpp-python (build with cuda)
26
+ RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
27
+
28
+ RUN useradd -m -u 1000 user
29
+ # Switch to the "user" user
30
+ USER user
31
+ # Set home to the user's home directory
32
+ ENV HOME=/home/user \
33
+ PATH=/home/user/.local/bin:$PATH \
34
+ PYTHONPATH=$HOME/app \
35
+ PYTHONUNBUFFERED=1 \
36
+ GRADIO_ALLOW_FLAGGING=never \
37
+ GRADIO_NUM_PORTS=1 \
38
+ GRADIO_SERVER_NAME=0.0.0.0 \
39
+ GRADIO_THEME=huggingface \
40
+ SYSTEM=spaces
41
+
42
+ WORKDIR $HOME/app
43
+
44
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
45
+ COPY --chown=user . $HOME/app
46
+
47
+ CMD ["python3", "app.py"]
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import copy
4
+ import time
5
+ import llama_cpp
6
+ from llama_cpp import Llama
7
+ from huggingface_hub import hf_hub_download
8
+
9
+
10
+ llm = Llama(
11
+ model_path=hf_hub_download(
12
+ repo_id=os.environ.get("REPO_ID", "Severian/ANIMA-Phi-Neptune-Mistral-7B-gguf"),
13
+ filename=os.environ.get("MODEL_FILE", "ANIMA-Phi-Neptune-Mistral-7B-gguf"),
14
+ ),
15
+ n_ctx=2048,
16
+ n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
17
+ )
18
+
19
+ history = []
20
+
21
+ system_message = """
22
+ Your name is ANIMA, an Advanced Nature Inspired Multidisciplinary Assistant, and a leading expert "
23
+ "in biomimicry, biology, engineering, industrial design, environmental science, physiology, and paleontology. "
24
+ "Your goal is to help the user work in a step-by-step way through the Biomimicry Design Process to propose "
25
+ "biomimetic solutions to a challenge."
26
+ "Nature's Unifying Patterns:"
27
+ "Nature uses only the energy it needs and relies on freely available energy."
28
+ "Nature recycles all materials."
29
+ "Nature is resilient to disturbances."
30
+ "Nature tends to optimize rather than maximize."
31
+ "Nature provides mutual benefits."
32
+ "Nature runs on information."
33
+ "Nature uses chemistry and materials that are safe for living beings."
34
+ "Nature builds using abundant resources, incorporating rare resources only sparingly."
35
+ "Nature is locally attuned and responsive."
36
+ "Nature uses shape to determine functionality.
37
+ """
38
+
39
+
40
+ def generate_text(message, history):
41
+ temp = ""
42
+ input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
43
+ for interaction in history:
44
+ input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
45
+
46
+ input_prompt = input_prompt + str(message) + " [/INST] "
47
+
48
+ output = llm(
49
+ input_prompt,
50
+ temperature=0.15,
51
+ top_p=0.1,
52
+ top_k=40,
53
+ repeat_penalty=1.1,
54
+ max_tokens=1024,
55
+ stop=[
56
+ "<|prompter|>",
57
+ "<|endoftext|>",
58
+ "<|endoftext|> \n",
59
+ "ASSISTANT:",
60
+ "USER:",
61
+ "SYSTEM:",
62
+ ],
63
+ stream=True,
64
+ )
65
+ for out in output:
66
+ stream = copy.deepcopy(out)
67
+ temp += stream["choices"][0]["text"]
68
+ yield temp
69
+
70
+ history = ["init", input_prompt]
71
+
72
+
73
+ demo = gr.ChatInterface(
74
+ generate_text,
75
+ title="A N I M A",
76
+ description="ANIMA is an expert in various scientific disciplines.",
77
+ examples=["tell me everything about biomimicry"],
78
+ cache_examples=True,
79
+ retry_btn=None,
80
+ undo_btn="Delete Previous",
81
+ clear_btn="Clear",
82
+ )
83
+ demo.queue(concurrency_count=1, max_size=5)
84
+ demo.launch()