ztime commited on
Commit
8fd0c06
1 Parent(s): a9adc36

add llama.cpp server

Browse files
Files changed (3) hide show
  1. Dockerfile +8 -1
  2. index.html +6 -6
  3. start_server.sh +5 -1
Dockerfile CHANGED
@@ -8,11 +8,18 @@ RUN apt-get update && \
8
  ninja-build \
9
  build-essential \
10
  pkg-config \
11
- curl
12
 
13
  RUN pip install -U pip setuptools wheel && \
14
  CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
15
 
 
 
 
 
 
 
 
16
  # Download model
17
  RUN mkdir model && \
18
  curl -L https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf -o model/gguf-model.bin
 
8
  ninja-build \
9
  build-essential \
10
  pkg-config \
11
+ curl cmake git
12
 
13
  RUN pip install -U pip setuptools wheel && \
14
  CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
15
 
16
+
17
+ RUN git clone https://github.com/ggerganov/llama.cpp.git llamacpp --depth 1 && \
18
+ cd llamacpp && \
19
+ cmake -B build && \
20
+ cmake --build build --config Release main server && \
21
+ cp build/bin/* ~/
22
+
23
  # Download model
24
  RUN mkdir model && \
25
  curl -L https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf -o model/gguf-model.bin
index.html CHANGED
@@ -1,10 +1,10 @@
1
  <!DOCTYPE html>
2
  <html>
3
  <head>
4
- <title>OpenHermes-2.5-Mistral-7B-GGUF (Q4_K_M)</title>
5
  </head>
6
  <body>
7
- <h1>OpenHermes-2.5-Mistral-7B-GGUF (Q4_K_M)</h1>
8
  <p>
9
  With the utilization of the
10
  <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
@@ -16,15 +16,15 @@
16
  <ul>
17
  <li>
18
  The API endpoint:
19
- <a href="https://limcheekin-openhermes-2-5-mistral-7b-gguf.hf.space/v1"
20
- >https://limcheekin-openhermes-2-5-mistral-7b-gguf.hf.space/v1</a
21
  >
22
  </li>
23
  <li>
24
  The API doc:
25
  <a
26
- href="https://limcheekin-openhermes-2-5-mistral-7b-gguf.hf.space/docs"
27
- >https://limcheekin-openhermes-2-5-mistral-7b-gguf.hf.space/docs</a
28
  >
29
  </li>
30
  </ul>
 
1
  <!DOCTYPE html>
2
  <html>
3
  <head>
4
+ <title>openchat-3.5-0106-GGUF (Q4_K_M)</title>
5
  </head>
6
  <body>
7
+ <h1>openchat-3.5-0106-GGUF (Q4_K_M)</h1>
8
  <p>
9
  With the utilization of the
10
  <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
 
16
  <ul>
17
  <li>
18
  The API endpoint:
19
+ <a href="https://ztime-openchat.hf.space/v1"
20
+ >https://https://ztime-openchat.hf.space/v1</a
21
  >
22
  </li>
23
  <li>
24
  The API doc:
25
  <a
26
+ href="https://ztime-openchat.hf.space/docs"
27
+ >https://ztime-openchat.hf.space/docs</a
28
  >
29
  </li>
30
  </ul>
start_server.sh CHANGED
@@ -3,4 +3,8 @@
3
  # For mlock support
4
  ulimit -l unlimited
5
 
6
- python3 -B main.py
 
 
 
 
 
3
  # For mlock support
4
  ulimit -l unlimited
5
 
6
+
7
+
8
+ ~/server --port 7890 -m model/gguf-model.bin
9
+ ./llamacpp/build/bin/server --port 7890 -m model/gguf-model.bin
10
+ # python3 -B main.py