Spaces:

gingdev
/

experiment

Paused

App Files Files Community

gingdev commited on Apr 9

Commit

730525f

•

1 Parent(s): 98b6c82

update

Browse files

Files changed (4) hide show

.gitignore +1 -0
Dockerfile +28 -0
README.md +9 -0
app.py +71 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.gguf

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+FROM python:latest
+RUN apt update && \
+	apt install -y wget
+# https://huggingface.co/docs/hub/spaces-sdks-docker-first-demo
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH \
+	FORWARDED_ALLOW_IPS=* \
+	HOST=0.0.0.0
+WORKDIR $HOME/app
+COPY --chown=user app.py .
+RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python[server]==0.2.55 huggingface_hub[cli,hf_transfer] httptools
+RUN HF_HUB_ENABLE_HF_TRANSFER=1 \
+	huggingface-cli download gingdev/ictu-vinallama-gguf ictu.gguf --local-dir . --local-dir-use-symlinks=True
+EXPOSE 8000
+ENTRYPOINT [ "python" ]
+CMD [ "app.py", "--model", "ictu.gguf" ]

README.md CHANGED Viewed

	@@ -1 +1,10 @@









1	# hf-space-python-llama-cpp

+---
+title: python-llama-cpp
+emoji: 😍
+colorFrom: green
+colorTo: green
+sdk: docker
+app_port: 8000
+pinned: true
+---
 # hf-space-python-llama-cpp

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# Copy from https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/server/__main__.py
+# pyright: reportAssignmentType=false
+from __future__ import annotations
+import os
+import sys
+import argparse
+import uvicorn
+from llama_cpp.server.app import create_app
+from llama_cpp.server.settings import (
+    Settings,
+    ServerSettings,
+    ModelSettings,
+    ConfigFileSettings,
+)
+from llama_cpp.server.cli import add_args_from_model, parse_model_from_args
+def main():
+    description = "🦙 Llama.cpp python server. Host your own LLMs!🚀"
+    parser = argparse.ArgumentParser(description=description)
+    add_args_from_model(parser, Settings)
+    parser.add_argument(
+        "--config_file",
+        type=str,
+        help="Path to a config file to load.",
+    )
+    server_settings: ServerSettings | None = None
+    model_settings: list[ModelSettings] = []
+    args = parser.parse_args()
+    try:
+        # Load server settings from config_file if provided
+        config_file = os.environ.get("CONFIG_FILE", args.config_file)
+        if config_file:
+            if not os.path.exists(config_file):
+                raise ValueError(f"Config file {config_file} not found!")
+            with open(config_file, "rb") as f:
+                config_file_settings = ConfigFileSettings.model_validate_json(f.read())
+                server_settings = ServerSettings.model_validate(config_file_settings)
+                model_settings = config_file_settings.models
+        else:
+            server_settings = parse_model_from_args(ServerSettings, args)
+            model_settings = [parse_model_from_args(ModelSettings, args)]
+    except Exception as e:
+        print(e, file=sys.stderr)
+        parser.print_help()
+        sys.exit(1)
+    assert server_settings is not None
+    assert model_settings is not None
+    app = create_app(
+        server_settings=server_settings,
+        model_settings=model_settings,
+    )
+    # Remove X-Request-Id plugin: https://github.com/abetlen/llama-cpp-python/issues/1337
+    app.user_middleware.pop()
+    uvicorn.run(
+        app,
+        host=os.getenv("HOST", server_settings.host),
+        port=int(os.getenv("PORT", server_settings.port)),
+        ssl_keyfile=server_settings.ssl_keyfile,
+        ssl_certfile=server_settings.ssl_certfile,
+    )
+if __name__ == "__main__":
+    main()