gingdev commited on
Commit
730525f
1 Parent(s): 98b6c82
Files changed (4) hide show
  1. .gitignore +1 -0
  2. Dockerfile +28 -0
  3. README.md +9 -0
  4. app.py +71 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.gguf
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:latest
2
+
3
+ RUN apt update && \
4
+ apt install -y wget
5
+
6
+ # https://huggingface.co/docs/hub/spaces-sdks-docker-first-demo
7
+ RUN useradd -m -u 1000 user
8
+
9
+ USER user
10
+
11
+ ENV HOME=/home/user \
12
+ PATH=/home/user/.local/bin:$PATH \
13
+ FORWARDED_ALLOW_IPS=* \
14
+ HOST=0.0.0.0
15
+
16
+ WORKDIR $HOME/app
17
+
18
+ COPY --chown=user app.py .
19
+
20
+ RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python[server]==0.2.55 huggingface_hub[cli,hf_transfer] httptools
21
+
22
+ RUN HF_HUB_ENABLE_HF_TRANSFER=1 \
23
+ huggingface-cli download gingdev/ictu-vinallama-gguf ictu.gguf --local-dir . --local-dir-use-symlinks=True
24
+
25
+ EXPOSE 8000
26
+
27
+ ENTRYPOINT [ "python" ]
28
+ CMD [ "app.py", "--model", "ictu.gguf" ]
README.md CHANGED
@@ -1 +1,10 @@
 
 
 
 
 
 
 
 
 
1
  # hf-space-python-llama-cpp
 
1
+ ---
2
+ title: python-llama-cpp
3
+ emoji: 😍
4
+ colorFrom: green
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 8000
8
+ pinned: true
9
+ ---
10
  # hf-space-python-llama-cpp
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copy from https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/server/__main__.py
2
+ # pyright: reportAssignmentType=false
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import sys
7
+ import argparse
8
+
9
+ import uvicorn
10
+
11
+ from llama_cpp.server.app import create_app
12
+ from llama_cpp.server.settings import (
13
+ Settings,
14
+ ServerSettings,
15
+ ModelSettings,
16
+ ConfigFileSettings,
17
+ )
18
+ from llama_cpp.server.cli import add_args_from_model, parse_model_from_args
19
+
20
+
21
+ def main():
22
+ description = "🦙 Llama.cpp python server. Host your own LLMs!🚀"
23
+ parser = argparse.ArgumentParser(description=description)
24
+
25
+ add_args_from_model(parser, Settings)
26
+ parser.add_argument(
27
+ "--config_file",
28
+ type=str,
29
+ help="Path to a config file to load.",
30
+ )
31
+ server_settings: ServerSettings | None = None
32
+ model_settings: list[ModelSettings] = []
33
+ args = parser.parse_args()
34
+ try:
35
+ # Load server settings from config_file if provided
36
+ config_file = os.environ.get("CONFIG_FILE", args.config_file)
37
+ if config_file:
38
+ if not os.path.exists(config_file):
39
+ raise ValueError(f"Config file {config_file} not found!")
40
+ with open(config_file, "rb") as f:
41
+ config_file_settings = ConfigFileSettings.model_validate_json(f.read())
42
+ server_settings = ServerSettings.model_validate(config_file_settings)
43
+ model_settings = config_file_settings.models
44
+ else:
45
+ server_settings = parse_model_from_args(ServerSettings, args)
46
+ model_settings = [parse_model_from_args(ModelSettings, args)]
47
+ except Exception as e:
48
+ print(e, file=sys.stderr)
49
+ parser.print_help()
50
+ sys.exit(1)
51
+ assert server_settings is not None
52
+ assert model_settings is not None
53
+ app = create_app(
54
+ server_settings=server_settings,
55
+ model_settings=model_settings,
56
+ )
57
+
58
+ # Remove X-Request-Id plugin: https://github.com/abetlen/llama-cpp-python/issues/1337
59
+ app.user_middleware.pop()
60
+
61
+ uvicorn.run(
62
+ app,
63
+ host=os.getenv("HOST", server_settings.host),
64
+ port=int(os.getenv("PORT", server_settings.port)),
65
+ ssl_keyfile=server_settings.ssl_keyfile,
66
+ ssl_certfile=server_settings.ssl_certfile,
67
+ )
68
+
69
+
70
+ if __name__ == "__main__":
71
+ main()