limcheekin commited on
Commit
2516e02
0 Parent(s):

Duplicate from limcheekin/orca_mini_v3_13B-GGML

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -0
  2. Dockerfile +35 -0
  3. README.md +20 -0
  4. index.html +37 -0
  5. main.py +28 -0
  6. start_server.sh +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Grab a fresh copy of the Python image
2
+ FROM python:3.10-slim
3
+
4
+ # Install build and runtime dependencies
5
+ RUN apt-get update && \
6
+ apt-get install -y \
7
+ libopenblas-dev \
8
+ ninja-build \
9
+ build-essential \
10
+ pkg-config \
11
+ curl
12
+
13
+ RUN pip install -U pip setuptools wheel && \
14
+ CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
15
+
16
+ # Download model
17
+ RUN mkdir model && \
18
+ curl -L https://huggingface.co/TheBloke/orca_mini_v3_13B-GGML/resolve/main/orca_mini_v3_13b.ggmlv3.q5_K_S.bin -o model/ggmlv3-model.bin
19
+
20
+ COPY ./start_server.sh ./
21
+ COPY ./main.py ./
22
+ COPY ./index.html ./
23
+
24
+ # Make the server start script executable
25
+ RUN chmod +x ./start_server.sh
26
+
27
+ # Set environment variable for the host
28
+ ENV HOST=0.0.0.0
29
+ ENV PORT=7860
30
+
31
+ # Expose a port for the server
32
+ EXPOSE ${PORT}
33
+
34
+ # Run the server start script
35
+ CMD ["/bin/sh", "./start_server.sh"]
README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: orca_mini_v3_13B-GGML (q5_K_S)
3
+ colorFrom: purple
4
+ colorTo: blue
5
+ sdk: docker
6
+ models:
7
+ - TheBloke/orca_mini_v3_13B-GGML
8
+ tags:
9
+ - inference api
10
+ - openai-api compatible
11
+ - llama-cpp-python
12
+ - orca_mini_v3_13B
13
+ - ggml
14
+ pinned: false
15
+ duplicated_from: limcheekin/orca_mini_v3_13B-GGML
16
+ ---
17
+
18
+ # orca_mini_v3_13B-GGML (q5_K_S)
19
+
20
+ Please refer to the [index.html](index.html) for more information.
index.html ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>orca_mini_v3_13B-GGML (q5_K_S)</title>
5
+ </head>
6
+ <body>
7
+ <h1>orca_mini_v3_13B-GGML (q5_K_S)</h1>
8
+ <p>
9
+ With the utilization of the
10
+ <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
11
+ package, we are excited to introduce the GGML model hosted in the Hugging
12
+ Face Docker Spaces, made accessible through an OpenAI-compatible API. This
13
+ space includes comprehensive API documentation to facilitate seamless
14
+ integration.
15
+ </p>
16
+ <ul>
17
+ <li>
18
+ The API endpoint:
19
+ <a href="https://limcheekin-orca-mini-v3-13b-ggml.hf.space/v1"
20
+ >https://limcheekin-orca-mini-v3-13b-ggml.hf.space/v1</a
21
+ >
22
+ </li>
23
+ <li>
24
+ The API doc:
25
+ <a href="https://limcheekin-orca-mini-v3-13b-ggml.hf.space/docs"
26
+ >https://limcheekin-orca-mini-v3-13b-ggml.hf.space/docs</a
27
+ >
28
+ </li>
29
+ </ul>
30
+ <p>
31
+ If you find this resource valuable, your support in the form of starring
32
+ the space would be greatly appreciated. Your engagement plays a vital role
33
+ in furthering the application for a community GPU grant, ultimately
34
+ enhancing the capabilities and accessibility of this space.
35
+ </p>
36
+ </body>
37
+ </html>
main.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp.server.app import create_app, Settings
2
+ from fastapi.responses import HTMLResponse
3
+ import os
4
+
5
+ print("os.cpu_count()", os.cpu_count())
6
+ app = create_app(
7
+ Settings(
8
+ n_threads=os.cpu_count(),
9
+ model="model/ggmlv3-model.bin",
10
+ embedding=False
11
+ )
12
+ )
13
+
14
+ # Read the content of index.html once and store it in memory
15
+ with open("index.html", "r") as f:
16
+ content = f.read()
17
+
18
+
19
+ @app.get("/", response_class=HTMLResponse)
20
+ async def read_items():
21
+ return content
22
+
23
+ if __name__ == "__main__":
24
+ import uvicorn
25
+ uvicorn.run(app,
26
+ host=os.environ["HOST"],
27
+ port=int(os.environ["PORT"])
28
+ )
start_server.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ # For mlock support
4
+ ulimit -l unlimited
5
+
6
+ python3 -B main.py