Spaces:
Running
Running
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +114 -0
- .gitgnore +49 -0
- .gradio/certificate.pem +31 -0
- README.md +3 -9
- agents/__init__.py +1 -0
- agents/coder.py +23 -0
- agents/reviewer.py +27 -0
- app.py +194 -0
- assets/.gitkeep +1 -0
- assets/response.wav +0 -0
- llama.cpp/.clang-format +164 -0
- llama.cpp/.clang-tidy +27 -0
- llama.cpp/.devops/cann.Dockerfile +130 -0
- llama.cpp/.devops/cloud-v-pipeline +22 -0
- llama.cpp/.devops/cpu.Dockerfile +92 -0
- llama.cpp/.devops/cuda.Dockerfile +94 -0
- llama.cpp/.devops/intel.Dockerfile +95 -0
- llama.cpp/.devops/llama-cli-cann.Dockerfile +44 -0
- llama.cpp/.devops/llama-cpp-cuda.srpm.spec +83 -0
- llama.cpp/.devops/llama-cpp.srpm.spec +85 -0
- llama.cpp/.devops/musa.Dockerfile +101 -0
- llama.cpp/.devops/nix/apps.nix +21 -0
- llama.cpp/.devops/nix/devshells.nix +52 -0
- llama.cpp/.devops/nix/docker.nix +37 -0
- llama.cpp/.devops/nix/jetson-support.nix +39 -0
- llama.cpp/.devops/nix/nixpkgs-instances.nix +45 -0
- llama.cpp/.devops/nix/package-gguf-py.nix +36 -0
- llama.cpp/.devops/nix/package.nix +248 -0
- llama.cpp/.devops/nix/python-scripts.nix +66 -0
- llama.cpp/.devops/nix/scope.nix +41 -0
- llama.cpp/.devops/nix/sif.nix +27 -0
- llama.cpp/.devops/rocm.Dockerfile +113 -0
- llama.cpp/.devops/tools.sh +49 -0
- llama.cpp/.devops/vulkan.Dockerfile +89 -0
- llama.cpp/.dockerignore +20 -0
- llama.cpp/.ecrc +6 -0
- llama.cpp/.editorconfig +54 -0
- llama.cpp/.flake8 +18 -0
- llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +87 -0
- llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
- llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml +91 -0
- llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
- llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml +52 -0
- llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
- llama.cpp/.github/ISSUE_TEMPLATE/config.yml +11 -0
- llama.cpp/.github/actions/get-tag-name/action.yml +22 -0
- llama.cpp/.github/actions/windows-setup-cuda/action.yml +67 -0
- llama.cpp/.github/actions/windows-setup-curl/action.yml +30 -0
- llama.cpp/.github/labeler.yml +94 -0
- llama.cpp/.github/pull_request_template.md +1 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,117 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
llama.cpp/.vs/llama.cpp/CopilotIndices/17.14.786.1071/CodeChunks.db filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
llama.cpp/.vs/llama.cpp/CopilotIndices/17.14.786.1071/SemanticSymbols.db filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
llama.cpp/.vs/llama.cpp/FileContentIndex/fa8fa901-0eee-48bf-a604-aa4561f07b11.vsidx filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
llama.cpp/.vs/llama.cpp/v17/Browse.VC.db filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
llama.cpp/.vs/slnx.sqlite filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
llama.cpp/build/bin/ggml-base.dll filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
llama.cpp/build/bin/ggml-base.ilk filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
llama.cpp/build/bin/ggml-base.pdb filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
llama.cpp/build/bin/ggml-cpu-alderlake.dll filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
llama.cpp/build/bin/ggml-cpu-haswell.dll filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
llama.cpp/build/bin/ggml-cpu-icelake.dll filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
llama.cpp/build/bin/ggml-cpu-sandybridge.dll filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
llama.cpp/build/bin/ggml-cpu-sapphirerapids.dll filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
llama.cpp/build/bin/ggml-cpu-skylakex.dll filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
llama.cpp/build/bin/ggml-cpu-sse42.dll filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
llama.cpp/build/bin/ggml-cpu-x64.dll filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
llama.cpp/build/bin/ggml-cpu.dll filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
llama.cpp/build/bin/ggml-cpu.ilk filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
llama.cpp/build/bin/ggml-cpu.pdb filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
llama.cpp/build/bin/ggml-rpc.dll filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
llama.cpp/build/bin/ggml.ilk filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
llama.cpp/build/bin/ggml.pdb filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
llama.cpp/build/bin/libcurl-x64.dll filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
llama.cpp/build/bin/libomp140.x86_64.dll filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
llama.cpp/build/bin/llama-batched-bench.exe filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
llama.cpp/build/bin/llama-bench.exe filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
llama.cpp/build/bin/llama-cli.exe filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
llama.cpp/build/bin/llama-completion.exe filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
llama.cpp/build/bin/llama-fit-params.exe filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
llama.cpp/build/bin/llama-imatrix.exe filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
llama.cpp/build/bin/llama-mtmd-cli.exe filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
llama.cpp/build/bin/llama-perplexity.exe filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
llama.cpp/build/bin/llama-quantize.exe filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
llama.cpp/build/bin/llama-run.exe filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
llama.cpp/build/bin/llama-server.exe filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
llama.cpp/build/bin/llama-server.ilk filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
llama.cpp/build/bin/llama-server.pdb filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
llama.cpp/build/bin/llama-tokenize.exe filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
llama.cpp/build/bin/llama-tts.exe filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
llama.cpp/build/bin/llama.dll filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
llama.cpp/build/bin/llama.ilk filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
llama.cpp/build/bin/llama.pdb filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
llama.cpp/build/bin/mtmd.dll filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
llama.cpp/build/bin/mtmd.ilk filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
llama.cpp/build/bin/mtmd.pdb filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
llama.cpp/build/bin/rpc-server.exe filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
llama.cpp/build/CMakeFiles/4.1.0-rc4/CompilerIdC/CMakeCCompilerId.exe filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
llama.cpp/build/CMakeFiles/4.1.0-rc4/CompilerIdCXX/CMakeCXXCompilerId.exe filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
llama.cpp/build/common/CMakeFiles/common.dir/arg.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
llama.cpp/build/common/CMakeFiles/common.dir/chat-parser.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
llama.cpp/build/common/CMakeFiles/common.dir/chat.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
llama.cpp/build/common/CMakeFiles/common.dir/common.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
llama.cpp/build/common/CMakeFiles/common.dir/common.pdb filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
llama.cpp/build/common/CMakeFiles/common.dir/console.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
llama.cpp/build/common/CMakeFiles/common.dir/json-partial.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
llama.cpp/build/common/CMakeFiles/common.dir/json-schema-to-grammar.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
llama.cpp/build/common/CMakeFiles/common.dir/log.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
llama.cpp/build/common/CMakeFiles/common.dir/ngram-cache.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
llama.cpp/build/common/CMakeFiles/common.dir/regex-partial.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
llama.cpp/build/common/CMakeFiles/common.dir/sampling.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
llama.cpp/build/common/CMakeFiles/common.dir/speculative.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
llama.cpp/build/common/common.lib filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml-backend.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml-opt.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml-quants.c.obj filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml.c.obj filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/gguf.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/arch/x86/quants.c.obj filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/arch/x86/repack.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/binary-ops.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/ggml-cpu.c.obj filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/ggml-cpu.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/llamafile/sgemm.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/ops.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/quants.c.obj filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/repack.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/unary-ops.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml.dir/ggml-backend-reg.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
llama.cpp/build/ggml/src/ggml-base.lib filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-adapter.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-arch.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-batch.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-chat.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-context.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-grammar.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-graph.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-impl.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-io.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-kv-cache-unified-iswa.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-kv-cache-unified.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-memory-hybrid.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-memory-recurrent.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-mmap.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-model-loader.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-model-saver.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-model.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-quant.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-sampling.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-vocab.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/unicode-data.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/unicode.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/clip.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/mtmd-audio.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/mtmd-helper.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/mtmd.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
|
| 147 |
+
llama.cpp/build/tools/server/CMakeFiles/llama-server.dir/server.cpp.obj filter=lfs diff=lfs merge=lfs -text
|
| 148 |
+
llama.cpp/build/tools/server/CMakeFiles/llama-server.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
llama.cpp/docs/development/llama-star/idea-arch.key filter=lfs diff=lfs merge=lfs -text
|
.gitgnore
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
pip-wheel-metadata/
|
| 20 |
+
share/python-wheels/
|
| 21 |
+
*.egg
|
| 22 |
+
|
| 23 |
+
# Virtual Environment
|
| 24 |
+
venv/
|
| 25 |
+
env/
|
| 26 |
+
ENV/
|
| 27 |
+
.venv/
|
| 28 |
+
|
| 29 |
+
# IDE & Editors
|
| 30 |
+
.idea/
|
| 31 |
+
*.swp
|
| 32 |
+
*.swo
|
| 33 |
+
.DS_Store
|
| 34 |
+
.vscode/
|
| 35 |
+
*.sublime-project
|
| 36 |
+
*.sublime-workspace
|
| 37 |
+
|
| 38 |
+
# Logs & Databases
|
| 39 |
+
*.log
|
| 40 |
+
*.sqlite3
|
| 41 |
+
|
| 42 |
+
# Generated by app
|
| 43 |
+
response.wav
|
| 44 |
+
sandbox/*.py
|
| 45 |
+
!sandbox/.gitkeep
|
| 46 |
+
|
| 47 |
+
# Hugging Face cache
|
| 48 |
+
~/.cache/huggingface/
|
| 49 |
+
.gitattributes
|
.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title: AI
|
| 3 |
-
emoji: 🏃
|
| 4 |
-
colorFrom: pink
|
| 5 |
-
colorTo: gray
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 6.2.0
|
| 8 |
app_file: app.py
|
| 9 |
-
|
|
|
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: AI-Coding-Genius
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
app_file: app.py
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 5.42.0
|
| 6 |
---
|
|
|
|
|
|
agents/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
agents/coder.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# agents/coder.py
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
+
|
| 4 |
+
class CoderAgent:
|
| 5 |
+
def __init__(self, model_name="deepseek-ai/deepseek-coder-6.7b-instruct"):
|
| 6 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 7 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 8 |
+
model_name,
|
| 9 |
+
torch_dtype="auto",
|
| 10 |
+
device_map="auto"
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
def generate(self, prompt):
|
| 14 |
+
full_prompt = f"""
|
| 15 |
+
You're a brilliant, friendly AI coder. Explain clearly and write clean Python.
|
| 16 |
+
Include comments and use best practices.
|
| 17 |
+
|
| 18 |
+
Task: {prompt}
|
| 19 |
+
""".strip()
|
| 20 |
+
inputs = self.tokenizer(full_prompt, return_tensors="pt").to(self.model.device)
|
| 21 |
+
outputs = self.model.generate(**inputs, max_new_tokens=1024, temperature=0.4)
|
| 22 |
+
code = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 23 |
+
return code[len(full_prompt):].strip()
|
agents/reviewer.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# agents/reviewer.py
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
|
| 4 |
+
class ReviewerAgent:
|
| 5 |
+
def __init__(self, model_name="Qwen/Qwen2-7B-Instruct"):
|
| 6 |
+
self.pipe = pipeline(
|
| 7 |
+
"text-generation",
|
| 8 |
+
model=model_name,
|
| 9 |
+
torch_dtype="auto",
|
| 10 |
+
device_map="auto"
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
def review(self, code):
|
| 14 |
+
prompt = f"""
|
| 15 |
+
Review this Python code for:
|
| 16 |
+
- Bugs
|
| 17 |
+
- Performance
|
| 18 |
+
- Readability
|
| 19 |
+
- Best practices
|
| 20 |
+
|
| 21 |
+
Code:
|
| 22 |
+
{code}
|
| 23 |
+
|
| 24 |
+
Provide a clear, constructive review.
|
| 25 |
+
"""
|
| 26 |
+
result = self.pipe(prompt, max_new_tokens=512)
|
| 27 |
+
return result[0]['generated_text']
|
app.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
|
| 5 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 6 |
+
import librosa
|
| 7 |
+
import soundfile as sf
|
| 8 |
+
import numpy as np
|
| 9 |
+
from llama_cpp import Llama
|
| 10 |
+
from huggingface_hub import hf_hub_download # Needed to get the model
|
| 11 |
+
|
| 12 |
+
# ─────────────────────────────────────────────────────────────
|
| 13 |
+
# 🧠 Load Qwen Coder (The Brain) - INSIDE Python now
|
| 14 |
+
# ─────────────────────────────────────────────────────────────
|
| 15 |
+
print("🧠 Downloading/Loading Qwen Model...")
|
| 16 |
+
try:
|
| 17 |
+
# 1. Download the model file from Hugging Face automatically
|
| 18 |
+
model_path = hf_hub_download(
|
| 19 |
+
repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
|
| 20 |
+
filename="qwen2.5-coder-1.5b-instruct-q8_0.gguf"
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# 2. Load it directly into memory
|
| 24 |
+
llm = Llama(
|
| 25 |
+
model_path=model_path,
|
| 26 |
+
n_ctx=4096, # Context window
|
| 27 |
+
n_threads=2, # Use 2 CPU threads (good for free tier)
|
| 28 |
+
verbose=False
|
| 29 |
+
)
|
| 30 |
+
print("✅ Qwen Model Loaded Successfully!")
|
| 31 |
+
llm_ready = True
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f"❌ Failed to load Qwen: {e}")
|
| 34 |
+
llm_ready = False
|
| 35 |
+
|
| 36 |
+
# ─────────────────────────────────────────────────────────────
|
| 37 |
+
# 🔊 Load Text-to-Speech (TTS)
|
| 38 |
+
# ─────────────────────────────────────────────────────────────
|
| 39 |
+
print("🔊 Loading TTS model...")
|
| 40 |
+
try:
|
| 41 |
+
tts_processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
| 42 |
+
tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
|
| 43 |
+
speaker_embeddings = torch.zeros(1, 512) # Safe fallback
|
| 44 |
+
tts_ready = True
|
| 45 |
+
print("✅ TTS loaded!")
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"❌ TTS failed to load: {e}")
|
| 48 |
+
tts_ready = False
|
| 49 |
+
|
| 50 |
+
# ─────────────────────────────────────────────────────────────
|
| 51 |
+
# 🎤 Load Speech-to-Text (STT) - Whisper Tiny
|
| 52 |
+
# ─────────────────────────────────────────────────────────────
|
| 53 |
+
print("🎤 Loading STT model (whisper-tiny)...")
|
| 54 |
+
try:
|
| 55 |
+
stt_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
|
| 56 |
+
stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
|
| 57 |
+
stt_model.eval()
|
| 58 |
+
stt_ready = True
|
| 59 |
+
print("✅ STT loaded!")
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f"❌ STT failed to load: {e}")
|
| 62 |
+
stt_ready = False
|
| 63 |
+
|
| 64 |
+
# Create folders
|
| 65 |
+
os.makedirs("assets", exist_ok=True)
|
| 66 |
+
|
| 67 |
+
# ─────────────────────────────────────────────────────────────
|
| 68 |
+
# 🎤 Convert Speech to Text
|
| 69 |
+
# ─────────────────────────────────────────────────────────────
|
| 70 |
+
def speech_to_text(audio):
|
| 71 |
+
if not stt_ready or audio is None:
|
| 72 |
+
return "Voice input not available."
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
sample_rate, y = audio
|
| 76 |
+
if y.dtype != np.float32:
|
| 77 |
+
y = y.astype(np.float32) / 32768.0
|
| 78 |
+
if len(y.shape) > 1:
|
| 79 |
+
y = y.mean(axis=1)
|
| 80 |
+
if sample_rate != 16000:
|
| 81 |
+
y = librosa.resample(y, orig_sr=sample_rate, target_sr=16000)
|
| 82 |
+
|
| 83 |
+
inputs = stt_processor(y, sampling_rate=16000, return_tensors="pt")
|
| 84 |
+
outputs = stt_model.generate(inputs["input_features"])
|
| 85 |
+
text = stt_processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 86 |
+
return text.strip()
|
| 87 |
+
except Exception as e:
|
| 88 |
+
return f"❌ STT Error: {str(e)}"
|
| 89 |
+
|
| 90 |
+
# ─────────────────────────────────────────────────────────────
|
| 91 |
+
# 💬 Generate Code (Now using Internal LLM)
|
| 92 |
+
# ─────────────────────────────────────────────────────────────
|
| 93 |
+
def generate_code(prompt):
|
| 94 |
+
if not prompt.strip():
|
| 95 |
+
yield "# 👋 Hello!", "Hi! I'm your AI coding partner.", None
|
| 96 |
+
return
|
| 97 |
+
|
| 98 |
+
if not llm_ready:
|
| 99 |
+
yield "# Error", "❌ Model failed to load. Check logs.", None
|
| 100 |
+
return
|
| 101 |
+
|
| 102 |
+
yield "# Thinking...", "🧠 AI is thinking...", None
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
# Create the prompt in ChatML format
|
| 106 |
+
messages = [
|
| 107 |
+
{"role": "system", "content": "You are a helpful AI coder."},
|
| 108 |
+
{"role": "user", "content": prompt}
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
# Ask the internal model to generate
|
| 112 |
+
output = llm.create_chat_completion(
|
| 113 |
+
messages=messages,
|
| 114 |
+
max_tokens=512,
|
| 115 |
+
temperature=0.4,
|
| 116 |
+
top_p=0.95,
|
| 117 |
+
stream=True
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Stream the response
|
| 121 |
+
raw_code = ""
|
| 122 |
+
for chunk in output:
|
| 123 |
+
if "content" in chunk["choices"][0]["delta"]:
|
| 124 |
+
text_chunk = chunk["choices"][0]["delta"]["content"]
|
| 125 |
+
raw_code += text_chunk
|
| 126 |
+
# Live update the code block
|
| 127 |
+
yield f"```python\n{raw_code}\n```", "🚀 Generating...", None
|
| 128 |
+
|
| 129 |
+
# Clean up code
|
| 130 |
+
clean_code = raw_code
|
| 131 |
+
if "```python" in clean_code:
|
| 132 |
+
clean_code = clean_code.split("```python")[1].split("```")[0].strip()
|
| 133 |
+
elif "```" in clean_code:
|
| 134 |
+
clean_code = clean_code.split("```")[1].split("```")[0].strip()
|
| 135 |
+
|
| 136 |
+
final_display = f"```python\n{clean_code}\n```"
|
| 137 |
+
|
| 138 |
+
# 🎙️ Generate voice
|
| 139 |
+
audio_path = None
|
| 140 |
+
if tts_ready:
|
| 141 |
+
try:
|
| 142 |
+
voice_text = f"Here is the code for {prompt[:20]}"
|
| 143 |
+
inputs_tts = tts_processor(text=voice_text, return_tensors="pt")
|
| 144 |
+
speech = tts_model.generate_speech(inputs_tts["input_ids"], speaker_embeddings)
|
| 145 |
+
audio_path = os.path.abspath("assets/response.wav")
|
| 146 |
+
sf.write(audio_path, speech.cpu().numpy(), samplerate=16000)
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"⚠️ TTS failed: {e}")
|
| 149 |
+
|
| 150 |
+
yield final_display, f"✅ Done!", audio_path
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
yield "print('Error')", f"❌ Error: {str(e)}", None
|
| 154 |
+
|
| 155 |
+
# ─────────────────────────────────────────────────────────────
|
| 156 |
+
# 💾 Save As Function
|
| 157 |
+
# ─────────────────────────────────────────────────────────────
|
| 158 |
+
def save_as_code(code, filename):
|
| 159 |
+
if not filename.strip():
|
| 160 |
+
filename = "ai_generated_code.py"
|
| 161 |
+
elif not filename.endswith(".py"):
|
| 162 |
+
filename += ".py"
|
| 163 |
+
try:
|
| 164 |
+
clean_code = code.replace("```python", "").replace("```", "").strip()
|
| 165 |
+
# In cloud, we just save to volatile memory, but this works for the demo
|
| 166 |
+
with open(filename, "w", encoding="utf-8") as f:
|
| 167 |
+
f.write(clean_code)
|
| 168 |
+
return f"💾 Saved (Temporary): {filename}"
|
| 169 |
+
except Exception as e:
|
| 170 |
+
return f"❌ Save failed: {str(e)}"
|
| 171 |
+
|
| 172 |
+
# ─────────────────────────────────────────────────────────────
|
| 173 |
+
# 🚀 Gradio UI
|
| 174 |
+
# ─────────────────────────────────────────────────────────────
|
| 175 |
+
with gr.Blocks(title="AI Coding Genius", theme=gr.themes.Soft()) as demo:
|
| 176 |
+
gr.Markdown("# 🤖 AI Coding Genius (Cloud Edition)")
|
| 177 |
+
|
| 178 |
+
with gr.Row():
|
| 179 |
+
stt_input = gr.Audio(label="🎤 Speak", type="numpy", format="wav")
|
| 180 |
+
|
| 181 |
+
inp = gr.Textbox(label="💬 Prompt", placeholder="Make a snake game...")
|
| 182 |
+
btn = gr.Button("🚀 Generate", variant="primary")
|
| 183 |
+
|
| 184 |
+
code_out = gr.Code(label="💻 Code", language="python", lines=15)
|
| 185 |
+
status_out = gr.Textbox(label="Status")
|
| 186 |
+
audio_out = gr.Audio(label="Voice", autoplay=True)
|
| 187 |
+
|
| 188 |
+
# Wiring
|
| 189 |
+
stt_input.change(speech_to_text, stt_input, inp)
|
| 190 |
+
btn.click(generate_code, inp, [code_out, status_out, audio_out])
|
| 191 |
+
|
| 192 |
+
if __name__ == "__main__":
|
| 193 |
+
# Removed share=True for Cloud deployment
|
| 194 |
+
demo.launch()
|
assets/.gitkeep
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
assets/response.wav
ADDED
|
Binary file (18 kB). View file
|
|
|
llama.cpp/.clang-format
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
Language: Cpp
|
| 3 |
+
AlignAfterOpenBracket: Align
|
| 4 |
+
AlignArrayOfStructures: Left
|
| 5 |
+
AlignConsecutiveAssignments: AcrossComments
|
| 6 |
+
AlignConsecutiveBitFields: AcrossComments
|
| 7 |
+
AlignConsecutiveDeclarations: AcrossComments
|
| 8 |
+
AlignConsecutiveMacros: AcrossComments
|
| 9 |
+
# AlignConsecutiveShortCaseStatements: AcrossComments
|
| 10 |
+
AlignEscapedNewlines: Left # LeftWithLastLine
|
| 11 |
+
AlignOperands: Align
|
| 12 |
+
AlignTrailingComments:
|
| 13 |
+
Kind: Always
|
| 14 |
+
OverEmptyLines: 1
|
| 15 |
+
AllowAllArgumentsOnNextLine: true
|
| 16 |
+
AllowAllParametersOfDeclarationOnNextLine: false
|
| 17 |
+
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
| 18 |
+
AllowShortBlocksOnASingleLine: Never
|
| 19 |
+
AllowShortCaseLabelsOnASingleLine: false
|
| 20 |
+
AllowShortFunctionsOnASingleLine: Inline
|
| 21 |
+
AllowShortIfStatementsOnASingleLine: Never
|
| 22 |
+
AllowShortLambdasOnASingleLine: Inline
|
| 23 |
+
AllowShortLoopsOnASingleLine: false
|
| 24 |
+
AlwaysBreakBeforeMultilineStrings: true
|
| 25 |
+
BinPackArguments: false
|
| 26 |
+
BinPackParameters: false # OnePerLine
|
| 27 |
+
BitFieldColonSpacing: Both
|
| 28 |
+
BreakBeforeBraces: Custom # Attach
|
| 29 |
+
BraceWrapping:
|
| 30 |
+
AfterCaseLabel: true
|
| 31 |
+
AfterClass: false
|
| 32 |
+
AfterControlStatement: false
|
| 33 |
+
AfterEnum: false
|
| 34 |
+
AfterFunction: false
|
| 35 |
+
AfterNamespace: false
|
| 36 |
+
AfterObjCDeclaration: false
|
| 37 |
+
AfterStruct: false
|
| 38 |
+
AfterUnion: false
|
| 39 |
+
AfterExternBlock: false
|
| 40 |
+
BeforeCatch: false
|
| 41 |
+
BeforeElse: false
|
| 42 |
+
BeforeLambdaBody: false
|
| 43 |
+
BeforeWhile: false
|
| 44 |
+
IndentBraces: false
|
| 45 |
+
SplitEmptyFunction: false
|
| 46 |
+
SplitEmptyRecord: false
|
| 47 |
+
SplitEmptyNamespace: false
|
| 48 |
+
# BreakAdjacentStringLiterals: true
|
| 49 |
+
BreakAfterAttributes: Never
|
| 50 |
+
BreakBeforeBinaryOperators: None
|
| 51 |
+
BreakBeforeInlineASMColon: OnlyMultiline
|
| 52 |
+
BreakBeforeTernaryOperators: false
|
| 53 |
+
# BreakBinaryOperations: Never
|
| 54 |
+
BreakConstructorInitializers: AfterColon
|
| 55 |
+
# BreakFunctionDefinitionParameters: false
|
| 56 |
+
BreakInheritanceList: AfterComma
|
| 57 |
+
BreakStringLiterals: true
|
| 58 |
+
# BreakTemplateDeclarations: Yes
|
| 59 |
+
ColumnLimit: 120
|
| 60 |
+
CommentPragmas: '^ IWYU pragma:'
|
| 61 |
+
CompactNamespaces: false
|
| 62 |
+
ConstructorInitializerIndentWidth: 4
|
| 63 |
+
ContinuationIndentWidth: 4
|
| 64 |
+
Cpp11BracedListStyle: false
|
| 65 |
+
DerivePointerAlignment: false
|
| 66 |
+
DisableFormat: false
|
| 67 |
+
EmptyLineBeforeAccessModifier: Leave
|
| 68 |
+
EmptyLineAfterAccessModifier: Never
|
| 69 |
+
ExperimentalAutoDetectBinPacking: false
|
| 70 |
+
FixNamespaceComments: true
|
| 71 |
+
IncludeBlocks: Regroup
|
| 72 |
+
IncludeCategories:
|
| 73 |
+
- Regex: '".*"'
|
| 74 |
+
Priority: 1
|
| 75 |
+
SortPriority: 0
|
| 76 |
+
- Regex: '^<.*\.h>'
|
| 77 |
+
Priority: 2
|
| 78 |
+
SortPriority: 0
|
| 79 |
+
- Regex: '^<.*'
|
| 80 |
+
Priority: 3
|
| 81 |
+
SortPriority: 0
|
| 82 |
+
- Regex: '.*'
|
| 83 |
+
Priority: 4
|
| 84 |
+
SortPriority: 0
|
| 85 |
+
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
| 86 |
+
IncludeIsMainSourceRegex: ''
|
| 87 |
+
IndentAccessModifiers: false
|
| 88 |
+
IndentCaseBlocks: true
|
| 89 |
+
IndentCaseLabels: true
|
| 90 |
+
IndentExternBlock: NoIndent
|
| 91 |
+
IndentGotoLabels: false
|
| 92 |
+
IndentPPDirectives: AfterHash
|
| 93 |
+
IndentWidth: 4
|
| 94 |
+
IndentWrappedFunctionNames: false
|
| 95 |
+
InsertBraces: true # NOTE: may lead to incorrect formatting
|
| 96 |
+
InsertNewlineAtEOF: true
|
| 97 |
+
JavaScriptQuotes: Leave
|
| 98 |
+
JavaScriptWrapImports: true
|
| 99 |
+
KeepEmptyLinesAtTheStartOfBlocks: false
|
| 100 |
+
LambdaBodyIndentation: Signature
|
| 101 |
+
LineEnding: LF
|
| 102 |
+
MacroBlockBegin: ''
|
| 103 |
+
MacroBlockEnd: ''
|
| 104 |
+
MaxEmptyLinesToKeep: 1
|
| 105 |
+
NamespaceIndentation: None
|
| 106 |
+
ObjCBinPackProtocolList: Auto
|
| 107 |
+
ObjCBlockIndentWidth: 4
|
| 108 |
+
ObjCSpaceAfterProperty: true
|
| 109 |
+
ObjCSpaceBeforeProtocolList: true
|
| 110 |
+
PPIndentWidth: -1
|
| 111 |
+
PackConstructorInitializers: CurrentLine
|
| 112 |
+
PenaltyBreakAssignment: 2
|
| 113 |
+
PenaltyBreakBeforeFirstCallParameter: 1
|
| 114 |
+
PenaltyBreakComment: 300
|
| 115 |
+
PenaltyBreakFirstLessLess: 120
|
| 116 |
+
PenaltyBreakString: 1000
|
| 117 |
+
PenaltyBreakTemplateDeclaration: 10
|
| 118 |
+
PenaltyExcessCharacter: 1000000
|
| 119 |
+
PenaltyReturnTypeOnItsOwnLine: 200
|
| 120 |
+
PointerAlignment: Middle
|
| 121 |
+
QualifierAlignment: Left
|
| 122 |
+
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
| 123 |
+
RawStringFormats:
|
| 124 |
+
- Language: Cpp
|
| 125 |
+
Delimiters:
|
| 126 |
+
- cc
|
| 127 |
+
- CC
|
| 128 |
+
- cpp
|
| 129 |
+
- Cpp
|
| 130 |
+
- CPP
|
| 131 |
+
- 'c++'
|
| 132 |
+
- 'C++'
|
| 133 |
+
CanonicalDelimiter: ''
|
| 134 |
+
ReferenceAlignment: Middle
|
| 135 |
+
ReflowComments: false # IndentOnly
|
| 136 |
+
SeparateDefinitionBlocks: Always
|
| 137 |
+
SortIncludes: CaseInsensitive
|
| 138 |
+
SortUsingDeclarations: LexicographicNumeric
|
| 139 |
+
SpaceAfterCStyleCast: true
|
| 140 |
+
SpaceAfterLogicalNot: false
|
| 141 |
+
SpaceAfterTemplateKeyword: true
|
| 142 |
+
SpaceBeforeAssignmentOperators: true
|
| 143 |
+
SpaceBeforeCpp11BracedList: false
|
| 144 |
+
SpaceBeforeCtorInitializerColon: true
|
| 145 |
+
SpaceBeforeInheritanceColon: true
|
| 146 |
+
SpaceBeforeParens: ControlStatements
|
| 147 |
+
SpaceBeforeRangeBasedForLoopColon: true
|
| 148 |
+
SpaceInEmptyBlock: false
|
| 149 |
+
SpaceInEmptyParentheses: false
|
| 150 |
+
SpacesBeforeTrailingComments: 2
|
| 151 |
+
SpacesInAngles: Never
|
| 152 |
+
SpacesInContainerLiterals: true
|
| 153 |
+
SpacesInLineCommentPrefix:
|
| 154 |
+
Minimum: 1
|
| 155 |
+
Maximum: -1
|
| 156 |
+
SpacesInParentheses: false
|
| 157 |
+
SpacesInSquareBrackets: false
|
| 158 |
+
SpaceBeforeSquareBrackets: false
|
| 159 |
+
Standard: c++17
|
| 160 |
+
TabWidth: 4
|
| 161 |
+
UseTab: Never
|
| 162 |
+
WhitespaceSensitiveMacros: ['STRINGIZE']
|
| 163 |
+
...
|
| 164 |
+
|
llama.cpp/.clang-tidy
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
Checks: >
|
| 3 |
+
bugprone-*,
|
| 4 |
+
-bugprone-easily-swappable-parameters,
|
| 5 |
+
-bugprone-implicit-widening-of-multiplication-result,
|
| 6 |
+
-bugprone-misplaced-widening-cast,
|
| 7 |
+
-bugprone-narrowing-conversions,
|
| 8 |
+
readability-*,
|
| 9 |
+
-readability-avoid-unconditional-preprocessor-if,
|
| 10 |
+
-readability-function-cognitive-complexity,
|
| 11 |
+
-readability-identifier-length,
|
| 12 |
+
-readability-implicit-bool-conversion,
|
| 13 |
+
-readability-magic-numbers,
|
| 14 |
+
-readability-uppercase-literal-suffix,
|
| 15 |
+
-readability-simplify-boolean-expr,
|
| 16 |
+
-readability-math-missing-parentheses,
|
| 17 |
+
clang-analyzer-*,
|
| 18 |
+
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
| 19 |
+
performance-*,
|
| 20 |
+
portability-*,
|
| 21 |
+
-portability-simd-intrinsics,
|
| 22 |
+
misc-*,
|
| 23 |
+
-misc-const-correctness,
|
| 24 |
+
-misc-non-private-member-variables-in-classes,
|
| 25 |
+
-misc-no-recursion,
|
| 26 |
+
-misc-use-anonymous-namespace,
|
| 27 |
+
FormatStyle: none
|
llama.cpp/.devops/cann.Dockerfile
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ==============================================================================
|
| 2 |
+
# ARGUMENTS
|
| 3 |
+
# ==============================================================================
|
| 4 |
+
|
| 5 |
+
# Define the CANN base image for easier version updates later
|
| 6 |
+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
|
| 7 |
+
|
| 8 |
+
# ==============================================================================
|
| 9 |
+
# BUILD STAGE
|
| 10 |
+
# Compile all binary files and libraries
|
| 11 |
+
# ==============================================================================
|
| 12 |
+
FROM ${CANN_BASE_IMAGE} AS build
|
| 13 |
+
|
| 14 |
+
# Define the Ascend chip model for compilation. Default is Ascend910B3
|
| 15 |
+
ARG ASCEND_SOC_TYPE=Ascend910B3
|
| 16 |
+
|
| 17 |
+
# -- Install build dependencies --
|
| 18 |
+
RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
|
| 19 |
+
yum clean all && \
|
| 20 |
+
rm -rf /var/cache/yum
|
| 21 |
+
|
| 22 |
+
# -- Set the working directory --
|
| 23 |
+
WORKDIR /app
|
| 24 |
+
|
| 25 |
+
# -- Copy project files --
|
| 26 |
+
COPY . .
|
| 27 |
+
|
| 28 |
+
# -- Set CANN environment variables (required for compilation) --
|
| 29 |
+
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
|
| 30 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 31 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
|
| 32 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
|
| 33 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 34 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
| 35 |
+
# ... You can add other environment variables from the original file as needed ...
|
| 36 |
+
# For brevity, only core variables are listed here. You can paste the original ENV list here.
|
| 37 |
+
|
| 38 |
+
# -- Build llama.cpp --
|
| 39 |
+
# Use the passed ASCEND_SOC_TYPE argument and add general build options
|
| 40 |
+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
|
| 41 |
+
&& \
|
| 42 |
+
cmake -B build \
|
| 43 |
+
-DGGML_CANN=ON \
|
| 44 |
+
-DCMAKE_BUILD_TYPE=Release \
|
| 45 |
+
-DSOC_TYPE=${ASCEND_SOC_TYPE} \
|
| 46 |
+
. && \
|
| 47 |
+
cmake --build build --config Release -j$(nproc)
|
| 48 |
+
|
| 49 |
+
# -- Organize build artifacts for copying in later stages --
|
| 50 |
+
# Create a lib directory to store all .so files
|
| 51 |
+
RUN mkdir -p /app/lib && \
|
| 52 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 53 |
+
|
| 54 |
+
# Create a full directory to store all executables and Python scripts
|
| 55 |
+
RUN mkdir -p /app/full && \
|
| 56 |
+
cp build/bin/* /app/full/ && \
|
| 57 |
+
cp *.py /app/full/ && \
|
| 58 |
+
cp -r gguf-py /app/full/ && \
|
| 59 |
+
cp -r requirements /app/full/ && \
|
| 60 |
+
cp requirements.txt /app/full/
|
| 61 |
+
# If you have a tools.sh script, make sure it is copied here
|
| 62 |
+
# cp .devops/tools.sh /app/full/tools.sh
|
| 63 |
+
|
| 64 |
+
# ==============================================================================
|
| 65 |
+
# BASE STAGE
|
| 66 |
+
# Create a minimal base image with CANN runtime and common libraries
|
| 67 |
+
# ==============================================================================
|
| 68 |
+
FROM ${CANN_BASE_IMAGE} AS base
|
| 69 |
+
|
| 70 |
+
# -- Install runtime dependencies --
|
| 71 |
+
RUN yum install -y libgomp curl && \
|
| 72 |
+
yum clean all && \
|
| 73 |
+
rm -rf /var/cache/yum
|
| 74 |
+
|
| 75 |
+
# -- Set CANN environment variables (required for runtime) --
|
| 76 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 77 |
+
ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
|
| 78 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
|
| 79 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 80 |
+
# ... You can add other environment variables from the original file as needed ...
|
| 81 |
+
|
| 82 |
+
WORKDIR /app
|
| 83 |
+
|
| 84 |
+
# Copy compiled .so files from the build stage
|
| 85 |
+
COPY --from=build /app/lib/ /app
|
| 86 |
+
|
| 87 |
+
# ==============================================================================
|
| 88 |
+
# FINAL STAGES (TARGETS)
|
| 89 |
+
# ==============================================================================
|
| 90 |
+
|
| 91 |
+
### Target: full
|
| 92 |
+
# Complete image with all tools, Python bindings, and dependencies
|
| 93 |
+
# ==============================================================================
|
| 94 |
+
FROM base AS full
|
| 95 |
+
|
| 96 |
+
COPY --from=build /app/full /app
|
| 97 |
+
|
| 98 |
+
# Install Python dependencies
|
| 99 |
+
RUN yum install -y git python3 python3-pip && \
|
| 100 |
+
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
|
| 101 |
+
pip3 install --no-cache-dir -r requirements.txt && \
|
| 102 |
+
yum clean all && \
|
| 103 |
+
rm -rf /var/cache/yum
|
| 104 |
+
|
| 105 |
+
# You need to provide a tools.sh script as the entrypoint
|
| 106 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 107 |
+
# If there is no tools.sh, you can set the default to start the server
|
| 108 |
+
# ENTRYPOINT ["/app/llama-server"]
|
| 109 |
+
|
| 110 |
+
### Target: light
|
| 111 |
+
# Lightweight image containing only llama-cli
|
| 112 |
+
# ==============================================================================
|
| 113 |
+
FROM base AS light
|
| 114 |
+
|
| 115 |
+
COPY --from=build /app/full/llama-cli /app
|
| 116 |
+
|
| 117 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 118 |
+
|
| 119 |
+
### Target: server
|
| 120 |
+
# Dedicated server image containing only llama-server
|
| 121 |
+
# ==============================================================================
|
| 122 |
+
FROM base AS server
|
| 123 |
+
|
| 124 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 125 |
+
|
| 126 |
+
COPY --from=build /app/full/llama-server /app
|
| 127 |
+
|
| 128 |
+
HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 129 |
+
|
| 130 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/cloud-v-pipeline
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
|
| 2 |
+
stage('Cleanup'){
|
| 3 |
+
cleanWs() // Cleaning previous CI build in workspace
|
| 4 |
+
}
|
| 5 |
+
stage('checkout repo'){
|
| 6 |
+
retry(5){ // Retry if the cloning fails due to some reason
|
| 7 |
+
checkout scm // Clone the repo on Runner
|
| 8 |
+
}
|
| 9 |
+
}
|
| 10 |
+
stage('Compiling llama.cpp'){
|
| 11 |
+
sh'''#!/bin/bash
|
| 12 |
+
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
|
| 13 |
+
'''
|
| 14 |
+
}
|
| 15 |
+
stage('Running llama.cpp'){
|
| 16 |
+
sh'''#!/bin/bash
|
| 17 |
+
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
| 18 |
+
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
| 19 |
+
cat llama_log.txt # Printing results
|
| 20 |
+
'''
|
| 21 |
+
}
|
| 22 |
+
}
|
llama.cpp/.devops/cpu.Dockerfile
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
|
| 3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
| 4 |
+
|
| 5 |
+
ARG TARGETARCH
|
| 6 |
+
|
| 7 |
+
ARG GGML_CPU_ARM_ARCH=armv8-a
|
| 8 |
+
|
| 9 |
+
RUN apt-get update && \
|
| 10 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
| 11 |
+
|
| 12 |
+
WORKDIR /app
|
| 13 |
+
|
| 14 |
+
COPY . .
|
| 15 |
+
|
| 16 |
+
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
| 17 |
+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
| 18 |
+
elif [ "$TARGETARCH" = "arm64" ]; then \
|
| 19 |
+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
|
| 20 |
+
else \
|
| 21 |
+
echo "Unsupported architecture"; \
|
| 22 |
+
exit 1; \
|
| 23 |
+
fi && \
|
| 24 |
+
cmake --build build -j $(nproc)
|
| 25 |
+
|
| 26 |
+
RUN mkdir -p /app/lib && \
|
| 27 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 28 |
+
|
| 29 |
+
RUN mkdir -p /app/full \
|
| 30 |
+
&& cp build/bin/* /app/full \
|
| 31 |
+
&& cp *.py /app/full \
|
| 32 |
+
&& cp -r gguf-py /app/full \
|
| 33 |
+
&& cp -r requirements /app/full \
|
| 34 |
+
&& cp requirements.txt /app/full \
|
| 35 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 36 |
+
|
| 37 |
+
## Base image
|
| 38 |
+
FROM ubuntu:$UBUNTU_VERSION AS base
|
| 39 |
+
|
| 40 |
+
RUN apt-get update \
|
| 41 |
+
&& apt-get install -y libgomp1 curl\
|
| 42 |
+
&& apt autoremove -y \
|
| 43 |
+
&& apt clean -y \
|
| 44 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 45 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 46 |
+
&& find /var/cache -type f -delete
|
| 47 |
+
|
| 48 |
+
COPY --from=build /app/lib/ /app
|
| 49 |
+
|
| 50 |
+
### Full
|
| 51 |
+
FROM base AS full
|
| 52 |
+
|
| 53 |
+
COPY --from=build /app/full /app
|
| 54 |
+
|
| 55 |
+
WORKDIR /app
|
| 56 |
+
|
| 57 |
+
RUN apt-get update \
|
| 58 |
+
&& apt-get install -y \
|
| 59 |
+
git \
|
| 60 |
+
python3 \
|
| 61 |
+
python3-pip \
|
| 62 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 63 |
+
&& pip install -r requirements.txt \
|
| 64 |
+
&& apt autoremove -y \
|
| 65 |
+
&& apt clean -y \
|
| 66 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 67 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 68 |
+
&& find /var/cache -type f -delete
|
| 69 |
+
|
| 70 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 71 |
+
|
| 72 |
+
### Light, CLI only
|
| 73 |
+
FROM base AS light
|
| 74 |
+
|
| 75 |
+
COPY --from=build /app/full/llama-cli /app
|
| 76 |
+
|
| 77 |
+
WORKDIR /app
|
| 78 |
+
|
| 79 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 80 |
+
|
| 81 |
+
### Server, Server only
|
| 82 |
+
FROM base AS server
|
| 83 |
+
|
| 84 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 85 |
+
|
| 86 |
+
COPY --from=build /app/full/llama-server /app
|
| 87 |
+
|
| 88 |
+
WORKDIR /app
|
| 89 |
+
|
| 90 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 91 |
+
|
| 92 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/cuda.Dockerfile
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
# This needs to generally match the container host's environment.
|
| 3 |
+
ARG CUDA_VERSION=12.4.0
|
| 4 |
+
# Target the CUDA build image
|
| 5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
| 6 |
+
|
| 7 |
+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
| 8 |
+
|
| 9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
| 10 |
+
|
| 11 |
+
# CUDA architecture to build for (defaults to all supported archs)
|
| 12 |
+
ARG CUDA_DOCKER_ARCH=default
|
| 13 |
+
|
| 14 |
+
RUN apt-get update && \
|
| 15 |
+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
| 16 |
+
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
| 22 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
| 23 |
+
fi && \
|
| 24 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
| 25 |
+
cmake --build build --config Release -j$(nproc)
|
| 26 |
+
|
| 27 |
+
RUN mkdir -p /app/lib && \
|
| 28 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 29 |
+
|
| 30 |
+
RUN mkdir -p /app/full \
|
| 31 |
+
&& cp build/bin/* /app/full \
|
| 32 |
+
&& cp *.py /app/full \
|
| 33 |
+
&& cp -r gguf-py /app/full \
|
| 34 |
+
&& cp -r requirements /app/full \
|
| 35 |
+
&& cp requirements.txt /app/full \
|
| 36 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 37 |
+
|
| 38 |
+
## Base image
|
| 39 |
+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
| 40 |
+
|
| 41 |
+
RUN apt-get update \
|
| 42 |
+
&& apt-get install -y libgomp1 curl\
|
| 43 |
+
&& apt autoremove -y \
|
| 44 |
+
&& apt clean -y \
|
| 45 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 46 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 47 |
+
&& find /var/cache -type f -delete
|
| 48 |
+
|
| 49 |
+
COPY --from=build /app/lib/ /app
|
| 50 |
+
|
| 51 |
+
### Full
|
| 52 |
+
FROM base AS full
|
| 53 |
+
|
| 54 |
+
COPY --from=build /app/full /app
|
| 55 |
+
|
| 56 |
+
WORKDIR /app
|
| 57 |
+
|
| 58 |
+
RUN apt-get update \
|
| 59 |
+
&& apt-get install -y \
|
| 60 |
+
git \
|
| 61 |
+
python3 \
|
| 62 |
+
python3-pip \
|
| 63 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 64 |
+
&& pip install -r requirements.txt \
|
| 65 |
+
&& apt autoremove -y \
|
| 66 |
+
&& apt clean -y \
|
| 67 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 68 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 69 |
+
&& find /var/cache -type f -delete
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 73 |
+
|
| 74 |
+
### Light, CLI only
|
| 75 |
+
FROM base AS light
|
| 76 |
+
|
| 77 |
+
COPY --from=build /app/full/llama-cli /app
|
| 78 |
+
|
| 79 |
+
WORKDIR /app
|
| 80 |
+
|
| 81 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 82 |
+
|
| 83 |
+
### Server, Server only
|
| 84 |
+
FROM base AS server
|
| 85 |
+
|
| 86 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 87 |
+
|
| 88 |
+
COPY --from=build /app/full/llama-server /app
|
| 89 |
+
|
| 90 |
+
WORKDIR /app
|
| 91 |
+
|
| 92 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 93 |
+
|
| 94 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/intel.Dockerfile
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
|
| 2 |
+
|
| 3 |
+
## Build Image
|
| 4 |
+
|
| 5 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
| 6 |
+
|
| 7 |
+
ARG GGML_SYCL_F16=OFF
|
| 8 |
+
RUN apt-get update && \
|
| 9 |
+
apt-get install -y git libcurl4-openssl-dev
|
| 10 |
+
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
COPY . .
|
| 14 |
+
|
| 15 |
+
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
| 16 |
+
echo "GGML_SYCL_F16 is set" \
|
| 17 |
+
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
| 18 |
+
fi && \
|
| 19 |
+
echo "Building with dynamic libs" && \
|
| 20 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
|
| 21 |
+
cmake --build build --config Release -j$(nproc)
|
| 22 |
+
|
| 23 |
+
RUN mkdir -p /app/lib && \
|
| 24 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 25 |
+
|
| 26 |
+
RUN mkdir -p /app/full \
|
| 27 |
+
&& cp build/bin/* /app/full \
|
| 28 |
+
&& cp *.py /app/full \
|
| 29 |
+
&& cp -r gguf-py /app/full \
|
| 30 |
+
&& cp -r requirements /app/full \
|
| 31 |
+
&& cp requirements.txt /app/full \
|
| 32 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 33 |
+
|
| 34 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
|
| 35 |
+
|
| 36 |
+
RUN apt-get update \
|
| 37 |
+
&& apt-get install -y libgomp1 curl\
|
| 38 |
+
&& apt autoremove -y \
|
| 39 |
+
&& apt clean -y \
|
| 40 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 41 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 42 |
+
&& find /var/cache -type f -delete
|
| 43 |
+
|
| 44 |
+
### Full
|
| 45 |
+
FROM base AS full
|
| 46 |
+
|
| 47 |
+
COPY --from=build /app/lib/ /app
|
| 48 |
+
COPY --from=build /app/full /app
|
| 49 |
+
|
| 50 |
+
WORKDIR /app
|
| 51 |
+
|
| 52 |
+
RUN apt-get update && \
|
| 53 |
+
apt-get install -y \
|
| 54 |
+
git \
|
| 55 |
+
python3 \
|
| 56 |
+
python3-pip \
|
| 57 |
+
python3-venv && \
|
| 58 |
+
python3 -m venv /opt/venv && \
|
| 59 |
+
. /opt/venv/bin/activate && \
|
| 60 |
+
pip install --upgrade pip setuptools wheel && \
|
| 61 |
+
pip install -r requirements.txt && \
|
| 62 |
+
apt autoremove -y && \
|
| 63 |
+
apt clean -y && \
|
| 64 |
+
rm -rf /tmp/* /var/tmp/* && \
|
| 65 |
+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
|
| 66 |
+
find /var/cache -type f -delete
|
| 67 |
+
|
| 68 |
+
ENV PATH="/opt/venv/bin:$PATH"
|
| 69 |
+
|
| 70 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 71 |
+
|
| 72 |
+
### Light, CLI only
|
| 73 |
+
FROM base AS light
|
| 74 |
+
|
| 75 |
+
COPY --from=build /app/lib/ /app
|
| 76 |
+
COPY --from=build /app/full/llama-cli /app
|
| 77 |
+
|
| 78 |
+
WORKDIR /app
|
| 79 |
+
|
| 80 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 81 |
+
|
| 82 |
+
### Server, Server only
|
| 83 |
+
FROM base AS server
|
| 84 |
+
|
| 85 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 86 |
+
|
| 87 |
+
COPY --from=build /app/lib/ /app
|
| 88 |
+
COPY --from=build /app/full/llama-server /app
|
| 89 |
+
|
| 90 |
+
WORKDIR /app
|
| 91 |
+
|
| 92 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 93 |
+
|
| 94 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
| 95 |
+
|
llama.cpp/.devops/llama-cli-cann.Dockerfile
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
|
| 2 |
+
|
| 3 |
+
FROM ascendai/cann:$ASCEND_VERSION AS build
|
| 4 |
+
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
COPY . .
|
| 8 |
+
|
| 9 |
+
RUN yum install -y gcc g++ cmake make libcurl-devel
|
| 10 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 11 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
| 12 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
| 13 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
| 14 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
| 15 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
| 16 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 17 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
| 18 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
| 19 |
+
|
| 20 |
+
# find libascend_hal.so, because the drive hasn`t been mounted.
|
| 21 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
| 22 |
+
|
| 23 |
+
RUN echo "Building with static libs" && \
|
| 24 |
+
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
| 25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
|
| 26 |
+
cmake --build build --config Release --target llama-cli
|
| 27 |
+
|
| 28 |
+
# TODO: use image with NNRT
|
| 29 |
+
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
| 30 |
+
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
| 31 |
+
|
| 32 |
+
ENV LC_ALL=C.utf8
|
| 33 |
+
|
| 34 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 35 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
| 36 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
| 37 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
| 38 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
| 39 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
| 40 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 41 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
| 42 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
| 43 |
+
|
| 44 |
+
ENTRYPOINT ["/llama-cli" ]
|
llama.cpp/.devops/llama-cpp-cuda.srpm.spec
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
| 2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
| 3 |
+
# Built and maintained by John Boero - boeroboy@gmail.com
|
| 4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
| 5 |
+
|
| 6 |
+
# Notes for llama.cpp:
|
| 7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
| 8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
| 9 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
| 10 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
| 11 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
| 12 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
| 13 |
+
# It is up to the user to install the correct vendor-specific support.
|
| 14 |
+
|
| 15 |
+
Name: llama.cpp-cuda
|
| 16 |
+
Version: %( date "+%%Y%%m%%d" )
|
| 17 |
+
Release: 1%{?dist}
|
| 18 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
| 19 |
+
License: MIT
|
| 20 |
+
Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
|
| 21 |
+
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
|
| 22 |
+
Requires: cuda-toolkit
|
| 23 |
+
URL: https://github.com/ggml-org/llama.cpp
|
| 24 |
+
|
| 25 |
+
%define debug_package %{nil}
|
| 26 |
+
%define source_date_epoch_from_changelog 0
|
| 27 |
+
|
| 28 |
+
%description
|
| 29 |
+
CPU inference for Meta's Lllama2 models using default options.
|
| 30 |
+
|
| 31 |
+
%prep
|
| 32 |
+
%setup -n llama.cpp-master
|
| 33 |
+
|
| 34 |
+
%build
|
| 35 |
+
make -j GGML_CUDA=1
|
| 36 |
+
|
| 37 |
+
%install
|
| 38 |
+
mkdir -p %{buildroot}%{_bindir}/
|
| 39 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
|
| 40 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
|
| 41 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
|
| 42 |
+
|
| 43 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
| 44 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
| 45 |
+
[Unit]
|
| 46 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
| 47 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
| 48 |
+
|
| 49 |
+
[Service]
|
| 50 |
+
Type=simple
|
| 51 |
+
EnvironmentFile=/etc/sysconfig/llama
|
| 52 |
+
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
|
| 53 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
| 54 |
+
Restart=never
|
| 55 |
+
|
| 56 |
+
[Install]
|
| 57 |
+
WantedBy=default.target
|
| 58 |
+
EOF
|
| 59 |
+
|
| 60 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
| 61 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
| 62 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
| 63 |
+
EOF
|
| 64 |
+
|
| 65 |
+
%clean
|
| 66 |
+
rm -rf %{buildroot}
|
| 67 |
+
rm -rf %{_builddir}/*
|
| 68 |
+
|
| 69 |
+
%files
|
| 70 |
+
%{_bindir}/llama-cuda-cli
|
| 71 |
+
%{_bindir}/llama-cuda-server
|
| 72 |
+
%{_bindir}/llama-cuda-simple
|
| 73 |
+
/usr/lib/systemd/system/llamacuda.service
|
| 74 |
+
%config /etc/sysconfig/llama
|
| 75 |
+
|
| 76 |
+
%pre
|
| 77 |
+
|
| 78 |
+
%post
|
| 79 |
+
|
| 80 |
+
%preun
|
| 81 |
+
%postun
|
| 82 |
+
|
| 83 |
+
%changelog
|
llama.cpp/.devops/llama-cpp.srpm.spec
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
| 2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
| 3 |
+
# Built and maintained by John Boero - boeroboy@gmail.com
|
| 4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
| 5 |
+
|
| 6 |
+
# Notes for llama.cpp:
|
| 7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
| 8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
| 9 |
+
# In the meantime, YYYYMMDD format will be used.
|
| 10 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
| 11 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
| 12 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
| 13 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
| 14 |
+
# It is up to the user to install the correct vendor-specific support.
|
| 15 |
+
|
| 16 |
+
Name: llama.cpp
|
| 17 |
+
Version: %( date "+%%Y%%m%%d" )
|
| 18 |
+
Release: 1%{?dist}
|
| 19 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
| 20 |
+
License: MIT
|
| 21 |
+
Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
|
| 22 |
+
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
|
| 23 |
+
Requires: libstdc++
|
| 24 |
+
URL: https://github.com/ggml-org/llama.cpp
|
| 25 |
+
|
| 26 |
+
%define debug_package %{nil}
|
| 27 |
+
%define source_date_epoch_from_changelog 0
|
| 28 |
+
|
| 29 |
+
%description
|
| 30 |
+
CPU inference for Meta's Lllama2 models using default options.
|
| 31 |
+
Models are not included in this package and must be downloaded separately.
|
| 32 |
+
|
| 33 |
+
%prep
|
| 34 |
+
%setup -n llama.cpp-master
|
| 35 |
+
|
| 36 |
+
%build
|
| 37 |
+
make -j
|
| 38 |
+
|
| 39 |
+
%install
|
| 40 |
+
mkdir -p %{buildroot}%{_bindir}/
|
| 41 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
|
| 42 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-server
|
| 43 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
|
| 44 |
+
|
| 45 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
| 46 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
| 47 |
+
[Unit]
|
| 48 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
| 49 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
| 50 |
+
|
| 51 |
+
[Service]
|
| 52 |
+
Type=simple
|
| 53 |
+
EnvironmentFile=/etc/sysconfig/llama
|
| 54 |
+
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
|
| 55 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
| 56 |
+
Restart=never
|
| 57 |
+
|
| 58 |
+
[Install]
|
| 59 |
+
WantedBy=default.target
|
| 60 |
+
EOF
|
| 61 |
+
|
| 62 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
| 63 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
| 64 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
| 65 |
+
EOF
|
| 66 |
+
|
| 67 |
+
%clean
|
| 68 |
+
rm -rf %{buildroot}
|
| 69 |
+
rm -rf %{_builddir}/*
|
| 70 |
+
|
| 71 |
+
%files
|
| 72 |
+
%{_bindir}/llama-cli
|
| 73 |
+
%{_bindir}/llama-server
|
| 74 |
+
%{_bindir}/llama-simple
|
| 75 |
+
/usr/lib/systemd/system/llama.service
|
| 76 |
+
%config /etc/sysconfig/llama
|
| 77 |
+
|
| 78 |
+
%pre
|
| 79 |
+
|
| 80 |
+
%post
|
| 81 |
+
|
| 82 |
+
%preun
|
| 83 |
+
%postun
|
| 84 |
+
|
| 85 |
+
%changelog
|
llama.cpp/.devops/musa.Dockerfile
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
# This needs to generally match the container host's environment.
|
| 3 |
+
ARG MUSA_VERSION=rc4.2.0
|
| 4 |
+
# Target the MUSA build image
|
| 5 |
+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
|
| 6 |
+
|
| 7 |
+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
|
| 8 |
+
|
| 9 |
+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
| 10 |
+
|
| 11 |
+
# MUSA architecture to build for (defaults to all supported archs)
|
| 12 |
+
ARG MUSA_DOCKER_ARCH=default
|
| 13 |
+
|
| 14 |
+
RUN apt-get update && \
|
| 15 |
+
apt-get install -y \
|
| 16 |
+
build-essential \
|
| 17 |
+
cmake \
|
| 18 |
+
python3 \
|
| 19 |
+
python3-pip \
|
| 20 |
+
git \
|
| 21 |
+
libcurl4-openssl-dev \
|
| 22 |
+
libgomp1
|
| 23 |
+
|
| 24 |
+
WORKDIR /app
|
| 25 |
+
|
| 26 |
+
COPY . .
|
| 27 |
+
|
| 28 |
+
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
| 29 |
+
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
| 30 |
+
fi && \
|
| 31 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
| 32 |
+
cmake --build build --config Release -j$(nproc)
|
| 33 |
+
|
| 34 |
+
RUN mkdir -p /app/lib && \
|
| 35 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 36 |
+
|
| 37 |
+
RUN mkdir -p /app/full \
|
| 38 |
+
&& cp build/bin/* /app/full \
|
| 39 |
+
&& cp *.py /app/full \
|
| 40 |
+
&& cp -r gguf-py /app/full \
|
| 41 |
+
&& cp -r requirements /app/full \
|
| 42 |
+
&& cp requirements.txt /app/full \
|
| 43 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 44 |
+
|
| 45 |
+
## Base image
|
| 46 |
+
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
|
| 47 |
+
|
| 48 |
+
RUN apt-get update \
|
| 49 |
+
&& apt-get install -y libgomp1 curl\
|
| 50 |
+
&& apt autoremove -y \
|
| 51 |
+
&& apt clean -y \
|
| 52 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 53 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 54 |
+
&& find /var/cache -type f -delete
|
| 55 |
+
|
| 56 |
+
COPY --from=build /app/lib/ /app
|
| 57 |
+
|
| 58 |
+
### Full
|
| 59 |
+
FROM base AS full
|
| 60 |
+
|
| 61 |
+
COPY --from=build /app/full /app
|
| 62 |
+
|
| 63 |
+
WORKDIR /app
|
| 64 |
+
|
| 65 |
+
RUN apt-get update \
|
| 66 |
+
&& apt-get install -y \
|
| 67 |
+
git \
|
| 68 |
+
python3 \
|
| 69 |
+
python3-pip \
|
| 70 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 71 |
+
&& pip install -r requirements.txt \
|
| 72 |
+
&& apt autoremove -y \
|
| 73 |
+
&& apt clean -y \
|
| 74 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 75 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 76 |
+
&& find /var/cache -type f -delete
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 80 |
+
|
| 81 |
+
### Light, CLI only
|
| 82 |
+
FROM base AS light
|
| 83 |
+
|
| 84 |
+
COPY --from=build /app/full/llama-cli /app
|
| 85 |
+
|
| 86 |
+
WORKDIR /app
|
| 87 |
+
|
| 88 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 89 |
+
|
| 90 |
+
### Server, Server only
|
| 91 |
+
FROM base AS server
|
| 92 |
+
|
| 93 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 94 |
+
|
| 95 |
+
COPY --from=build /app/full/llama-server /app
|
| 96 |
+
|
| 97 |
+
WORKDIR /app
|
| 98 |
+
|
| 99 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 100 |
+
|
| 101 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/nix/apps.nix
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
perSystem =
|
| 3 |
+
{ config, lib, ... }:
|
| 4 |
+
{
|
| 5 |
+
apps =
|
| 6 |
+
let
|
| 7 |
+
inherit (config.packages) default;
|
| 8 |
+
binaries = [
|
| 9 |
+
"llama-cli"
|
| 10 |
+
"llama-embedding"
|
| 11 |
+
"llama-server"
|
| 12 |
+
"llama-quantize"
|
| 13 |
+
];
|
| 14 |
+
mkApp = name: {
|
| 15 |
+
type = "app";
|
| 16 |
+
program = "${default}/bin/${name}";
|
| 17 |
+
};
|
| 18 |
+
in
|
| 19 |
+
lib.genAttrs binaries mkApp;
|
| 20 |
+
};
|
| 21 |
+
}
|
llama.cpp/.devops/nix/devshells.nix
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{ inputs, ... }:
|
| 2 |
+
|
| 3 |
+
{
|
| 4 |
+
perSystem =
|
| 5 |
+
{
|
| 6 |
+
config,
|
| 7 |
+
lib,
|
| 8 |
+
system,
|
| 9 |
+
...
|
| 10 |
+
}:
|
| 11 |
+
{
|
| 12 |
+
devShells =
|
| 13 |
+
let
|
| 14 |
+
pkgs = import inputs.nixpkgs { inherit system; };
|
| 15 |
+
stdenv = pkgs.stdenv;
|
| 16 |
+
scripts = config.packages.python-scripts;
|
| 17 |
+
in
|
| 18 |
+
lib.pipe (config.packages) [
|
| 19 |
+
(lib.concatMapAttrs (
|
| 20 |
+
name: package: {
|
| 21 |
+
${name} = pkgs.mkShell {
|
| 22 |
+
name = "${name}";
|
| 23 |
+
inputsFrom = [ package ];
|
| 24 |
+
shellHook = ''
|
| 25 |
+
echo "Entering ${name} devShell"
|
| 26 |
+
'';
|
| 27 |
+
};
|
| 28 |
+
"${name}-extra" =
|
| 29 |
+
if (name == "python-scripts") then
|
| 30 |
+
null
|
| 31 |
+
else
|
| 32 |
+
pkgs.mkShell {
|
| 33 |
+
name = "${name}-extra";
|
| 34 |
+
inputsFrom = [
|
| 35 |
+
package
|
| 36 |
+
scripts
|
| 37 |
+
];
|
| 38 |
+
# Extra packages that *may* be used by some scripts
|
| 39 |
+
packages = [
|
| 40 |
+
pkgs.python3Packages.tiktoken
|
| 41 |
+
];
|
| 42 |
+
shellHook = ''
|
| 43 |
+
echo "Entering ${name} devShell"
|
| 44 |
+
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
| 45 |
+
'';
|
| 46 |
+
};
|
| 47 |
+
}
|
| 48 |
+
))
|
| 49 |
+
(lib.filterAttrs (name: value: value != null))
|
| 50 |
+
];
|
| 51 |
+
};
|
| 52 |
+
}
|
llama.cpp/.devops/nix/docker.nix
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
dockerTools,
|
| 4 |
+
buildEnv,
|
| 5 |
+
llama-cpp,
|
| 6 |
+
interactive ? true,
|
| 7 |
+
coreutils,
|
| 8 |
+
}:
|
| 9 |
+
|
| 10 |
+
# A tar that can be fed into `docker load`:
|
| 11 |
+
#
|
| 12 |
+
# $ nix build .#llamaPackages.docker
|
| 13 |
+
# $ docker load < result
|
| 14 |
+
|
| 15 |
+
# For details and variations cf.
|
| 16 |
+
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
|
| 17 |
+
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
|
| 18 |
+
# - https://nixery.dev/
|
| 19 |
+
|
| 20 |
+
# Approximate (compressed) sizes, at the time of writing, are:
|
| 21 |
+
#
|
| 22 |
+
# .#llamaPackages.docker: 125M;
|
| 23 |
+
# .#llamaPackagesCuda.docker: 537M;
|
| 24 |
+
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
|
| 25 |
+
|
| 26 |
+
dockerTools.buildLayeredImage {
|
| 27 |
+
name = llama-cpp.pname;
|
| 28 |
+
tag = "latest";
|
| 29 |
+
|
| 30 |
+
contents =
|
| 31 |
+
[ llama-cpp ]
|
| 32 |
+
++ lib.optionals interactive [
|
| 33 |
+
coreutils
|
| 34 |
+
dockerTools.binSh
|
| 35 |
+
dockerTools.caCertificates
|
| 36 |
+
];
|
| 37 |
+
}
|
llama.cpp/.devops/nix/jetson-support.nix
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{ inputs, ... }:
|
| 2 |
+
{
|
| 3 |
+
perSystem =
|
| 4 |
+
{
|
| 5 |
+
config,
|
| 6 |
+
system,
|
| 7 |
+
lib,
|
| 8 |
+
pkgsCuda,
|
| 9 |
+
...
|
| 10 |
+
}:
|
| 11 |
+
{
|
| 12 |
+
legacyPackages =
|
| 13 |
+
let
|
| 14 |
+
caps.llamaPackagesXavier = "7.2";
|
| 15 |
+
caps.llamaPackagesOrin = "8.7";
|
| 16 |
+
caps.llamaPackagesTX2 = "6.2";
|
| 17 |
+
caps.llamaPackagesNano = "5.3";
|
| 18 |
+
|
| 19 |
+
pkgsFor =
|
| 20 |
+
cap:
|
| 21 |
+
import inputs.nixpkgs {
|
| 22 |
+
inherit system;
|
| 23 |
+
config = {
|
| 24 |
+
cudaSupport = true;
|
| 25 |
+
cudaCapabilities = [ cap ];
|
| 26 |
+
cudaEnableForwardCompat = false;
|
| 27 |
+
inherit (pkgsCuda.config) allowUnfreePredicate;
|
| 28 |
+
};
|
| 29 |
+
};
|
| 30 |
+
in
|
| 31 |
+
builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
|
| 32 |
+
|
| 33 |
+
packages = lib.optionalAttrs (system == "aarch64-linux") {
|
| 34 |
+
jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
|
| 35 |
+
jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
|
| 36 |
+
jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
|
| 37 |
+
};
|
| 38 |
+
};
|
| 39 |
+
}
|
llama.cpp/.devops/nix/nixpkgs-instances.nix
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{ inputs, ... }:
|
| 2 |
+
{
|
| 3 |
+
# The _module.args definitions are passed on to modules as arguments. E.g.
|
| 4 |
+
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
|
| 5 |
+
# `_module.args.pkgs` (defined in this case by flake-parts).
|
| 6 |
+
perSystem =
|
| 7 |
+
{ system, ... }:
|
| 8 |
+
{
|
| 9 |
+
_module.args = {
|
| 10 |
+
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
|
| 11 |
+
# again, the below creates several nixpkgs instances which the
|
| 12 |
+
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
|
| 13 |
+
#
|
| 14 |
+
# This is currently "slow" and "expensive", on a certain scale.
|
| 15 |
+
# This also isn't "right" in that this hinders dependency injection at
|
| 16 |
+
# the level of flake inputs. This might get removed in the foreseeable
|
| 17 |
+
# future.
|
| 18 |
+
#
|
| 19 |
+
# Note that you can use these expressions without Nix
|
| 20 |
+
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
|
| 21 |
+
|
| 22 |
+
pkgsCuda = import inputs.nixpkgs {
|
| 23 |
+
inherit system;
|
| 24 |
+
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
|
| 25 |
+
# and ucx are built with CUDA support)
|
| 26 |
+
config.cudaSupport = true;
|
| 27 |
+
config.allowUnfreePredicate =
|
| 28 |
+
p:
|
| 29 |
+
builtins.all (
|
| 30 |
+
license:
|
| 31 |
+
license.free
|
| 32 |
+
|| builtins.elem license.shortName [
|
| 33 |
+
"CUDA EULA"
|
| 34 |
+
"cuDNN EULA"
|
| 35 |
+
]
|
| 36 |
+
) (p.meta.licenses or [ p.meta.license ]);
|
| 37 |
+
};
|
| 38 |
+
# Ensure dependencies use ROCm consistently
|
| 39 |
+
pkgsRocm = import inputs.nixpkgs {
|
| 40 |
+
inherit system;
|
| 41 |
+
config.rocmSupport = true;
|
| 42 |
+
};
|
| 43 |
+
};
|
| 44 |
+
};
|
| 45 |
+
}
|
llama.cpp/.devops/nix/package-gguf-py.nix
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
llamaVersion,
|
| 4 |
+
numpy,
|
| 5 |
+
tqdm,
|
| 6 |
+
sentencepiece,
|
| 7 |
+
pyyaml,
|
| 8 |
+
poetry-core,
|
| 9 |
+
buildPythonPackage,
|
| 10 |
+
pytestCheckHook,
|
| 11 |
+
}:
|
| 12 |
+
|
| 13 |
+
buildPythonPackage {
|
| 14 |
+
pname = "gguf";
|
| 15 |
+
version = llamaVersion;
|
| 16 |
+
pyproject = true;
|
| 17 |
+
nativeBuildInputs = [ poetry-core ];
|
| 18 |
+
propagatedBuildInputs = [
|
| 19 |
+
numpy
|
| 20 |
+
tqdm
|
| 21 |
+
sentencepiece
|
| 22 |
+
pyyaml
|
| 23 |
+
];
|
| 24 |
+
src = lib.cleanSource ../../gguf-py;
|
| 25 |
+
pythonImportsCheck = [
|
| 26 |
+
"numpy"
|
| 27 |
+
"gguf"
|
| 28 |
+
];
|
| 29 |
+
nativeCheckInputs = [ pytestCheckHook ];
|
| 30 |
+
doCheck = true;
|
| 31 |
+
meta = with lib; {
|
| 32 |
+
description = "Python package for writing binary files in the GGUF format";
|
| 33 |
+
license = licenses.mit;
|
| 34 |
+
maintainers = [ maintainers.ditsuke ];
|
| 35 |
+
};
|
| 36 |
+
}
|
llama.cpp/.devops/nix/package.nix
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
glibc,
|
| 4 |
+
config,
|
| 5 |
+
stdenv,
|
| 6 |
+
runCommand,
|
| 7 |
+
cmake,
|
| 8 |
+
ninja,
|
| 9 |
+
pkg-config,
|
| 10 |
+
git,
|
| 11 |
+
mpi,
|
| 12 |
+
blas,
|
| 13 |
+
cudaPackages,
|
| 14 |
+
autoAddDriverRunpath,
|
| 15 |
+
darwin,
|
| 16 |
+
rocmPackages,
|
| 17 |
+
vulkan-headers,
|
| 18 |
+
vulkan-loader,
|
| 19 |
+
curl,
|
| 20 |
+
shaderc,
|
| 21 |
+
useBlas ?
|
| 22 |
+
builtins.all (x: !x) [
|
| 23 |
+
useCuda
|
| 24 |
+
useMetalKit
|
| 25 |
+
useRocm
|
| 26 |
+
useVulkan
|
| 27 |
+
]
|
| 28 |
+
&& blas.meta.available,
|
| 29 |
+
useCuda ? config.cudaSupport,
|
| 30 |
+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
| 31 |
+
# Increases the runtime closure size by ~700M
|
| 32 |
+
useMpi ? false,
|
| 33 |
+
useRocm ? config.rocmSupport,
|
| 34 |
+
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
| 35 |
+
enableCurl ? true,
|
| 36 |
+
useVulkan ? false,
|
| 37 |
+
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
| 38 |
+
|
| 39 |
+
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
| 40 |
+
# otherwise we get libstdc++ errors downstream.
|
| 41 |
+
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
| 42 |
+
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
| 43 |
+
precompileMetalShaders ? false,
|
| 44 |
+
}:
|
| 45 |
+
|
| 46 |
+
let
|
| 47 |
+
inherit (lib)
|
| 48 |
+
cmakeBool
|
| 49 |
+
cmakeFeature
|
| 50 |
+
optionalAttrs
|
| 51 |
+
optionals
|
| 52 |
+
strings
|
| 53 |
+
;
|
| 54 |
+
|
| 55 |
+
stdenv = throw "Use effectiveStdenv instead";
|
| 56 |
+
|
| 57 |
+
suffices =
|
| 58 |
+
lib.optionals useBlas [ "BLAS" ]
|
| 59 |
+
++ lib.optionals useCuda [ "CUDA" ]
|
| 60 |
+
++ lib.optionals useMetalKit [ "MetalKit" ]
|
| 61 |
+
++ lib.optionals useMpi [ "MPI" ]
|
| 62 |
+
++ lib.optionals useRocm [ "ROCm" ]
|
| 63 |
+
++ lib.optionals useVulkan [ "Vulkan" ];
|
| 64 |
+
|
| 65 |
+
pnameSuffix =
|
| 66 |
+
strings.optionalString (suffices != [ ])
|
| 67 |
+
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
| 68 |
+
descriptionSuffix = strings.optionalString (
|
| 69 |
+
suffices != [ ]
|
| 70 |
+
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
| 71 |
+
|
| 72 |
+
xcrunHost = runCommand "xcrunHost" { } ''
|
| 73 |
+
mkdir -p $out/bin
|
| 74 |
+
ln -s /usr/bin/xcrun $out/bin
|
| 75 |
+
'';
|
| 76 |
+
|
| 77 |
+
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
| 78 |
+
# separately
|
| 79 |
+
darwinBuildInputs =
|
| 80 |
+
with darwin.apple_sdk.frameworks;
|
| 81 |
+
[
|
| 82 |
+
Accelerate
|
| 83 |
+
CoreVideo
|
| 84 |
+
CoreGraphics
|
| 85 |
+
]
|
| 86 |
+
++ optionals useMetalKit [ MetalKit ];
|
| 87 |
+
|
| 88 |
+
cudaBuildInputs = with cudaPackages; [
|
| 89 |
+
cuda_cudart
|
| 90 |
+
cuda_cccl # <nv/target>
|
| 91 |
+
libcublas
|
| 92 |
+
];
|
| 93 |
+
|
| 94 |
+
rocmBuildInputs = with rocmPackages; [
|
| 95 |
+
clr
|
| 96 |
+
hipblas
|
| 97 |
+
rocblas
|
| 98 |
+
];
|
| 99 |
+
|
| 100 |
+
vulkanBuildInputs = [
|
| 101 |
+
vulkan-headers
|
| 102 |
+
vulkan-loader
|
| 103 |
+
shaderc
|
| 104 |
+
];
|
| 105 |
+
in
|
| 106 |
+
|
| 107 |
+
effectiveStdenv.mkDerivation (finalAttrs: {
|
| 108 |
+
pname = "llama-cpp${pnameSuffix}";
|
| 109 |
+
version = llamaVersion;
|
| 110 |
+
|
| 111 |
+
# Note: none of the files discarded here are visible in the sandbox or
|
| 112 |
+
# affect the output hash. This also means they can be modified without
|
| 113 |
+
# triggering a rebuild.
|
| 114 |
+
src = lib.cleanSourceWith {
|
| 115 |
+
filter =
|
| 116 |
+
name: type:
|
| 117 |
+
let
|
| 118 |
+
noneOf = builtins.all (x: !x);
|
| 119 |
+
baseName = baseNameOf name;
|
| 120 |
+
in
|
| 121 |
+
noneOf [
|
| 122 |
+
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
| 123 |
+
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
| 124 |
+
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
| 125 |
+
(baseName == "flake.lock")
|
| 126 |
+
];
|
| 127 |
+
src = lib.cleanSource ../../.;
|
| 128 |
+
};
|
| 129 |
+
|
| 130 |
+
postPatch = ''
|
| 131 |
+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
| 132 |
+
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
| 133 |
+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
| 134 |
+
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
| 135 |
+
'';
|
| 136 |
+
|
| 137 |
+
# With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
|
| 138 |
+
# `default.metallib` may be compiled with Metal compiler from XCode
|
| 139 |
+
# and we need to escape sandbox on MacOS to access Metal compiler.
|
| 140 |
+
# `xcrun` is used find the path of the Metal compiler, which is varible
|
| 141 |
+
# and not on $PATH
|
| 142 |
+
# see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
|
| 143 |
+
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
| 144 |
+
|
| 145 |
+
nativeBuildInputs =
|
| 146 |
+
[
|
| 147 |
+
cmake
|
| 148 |
+
ninja
|
| 149 |
+
pkg-config
|
| 150 |
+
git
|
| 151 |
+
]
|
| 152 |
+
++ optionals useCuda [
|
| 153 |
+
cudaPackages.cuda_nvcc
|
| 154 |
+
|
| 155 |
+
autoAddDriverRunpath
|
| 156 |
+
]
|
| 157 |
+
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
| 158 |
+
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
| 159 |
+
|
| 160 |
+
buildInputs =
|
| 161 |
+
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
| 162 |
+
++ optionals useCuda cudaBuildInputs
|
| 163 |
+
++ optionals useMpi [ mpi ]
|
| 164 |
+
++ optionals useRocm rocmBuildInputs
|
| 165 |
+
++ optionals useBlas [ blas ]
|
| 166 |
+
++ optionals useVulkan vulkanBuildInputs
|
| 167 |
+
++ optionals enableCurl [ curl ];
|
| 168 |
+
|
| 169 |
+
cmakeFlags =
|
| 170 |
+
[
|
| 171 |
+
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
| 172 |
+
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
| 173 |
+
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
| 174 |
+
(cmakeBool "LLAMA_CURL" enableCurl)
|
| 175 |
+
(cmakeBool "GGML_NATIVE" false)
|
| 176 |
+
(cmakeBool "GGML_BLAS" useBlas)
|
| 177 |
+
(cmakeBool "GGML_CUDA" useCuda)
|
| 178 |
+
(cmakeBool "GGML_HIP" useRocm)
|
| 179 |
+
(cmakeBool "GGML_METAL" useMetalKit)
|
| 180 |
+
(cmakeBool "GGML_VULKAN" useVulkan)
|
| 181 |
+
(cmakeBool "GGML_STATIC" enableStatic)
|
| 182 |
+
]
|
| 183 |
+
++ optionals useCuda [
|
| 184 |
+
(
|
| 185 |
+
with cudaPackages.flags;
|
| 186 |
+
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
| 187 |
+
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
| 188 |
+
)
|
| 189 |
+
)
|
| 190 |
+
]
|
| 191 |
+
++ optionals useRocm [
|
| 192 |
+
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
| 193 |
+
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
| 194 |
+
]
|
| 195 |
+
++ optionals useMetalKit [
|
| 196 |
+
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
| 197 |
+
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
| 198 |
+
];
|
| 199 |
+
|
| 200 |
+
# Environment variables needed for ROCm
|
| 201 |
+
env = optionalAttrs useRocm {
|
| 202 |
+
ROCM_PATH = "${rocmPackages.clr}";
|
| 203 |
+
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
| 204 |
+
};
|
| 205 |
+
|
| 206 |
+
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
| 207 |
+
# if they haven't been added yet.
|
| 208 |
+
postInstall = ''
|
| 209 |
+
mkdir -p $out/include
|
| 210 |
+
cp $src/include/llama.h $out/include/
|
| 211 |
+
'';
|
| 212 |
+
|
| 213 |
+
meta = {
|
| 214 |
+
# Configurations we don't want even the CI to evaluate. Results in the
|
| 215 |
+
# "unsupported platform" messages. This is mostly a no-op, because
|
| 216 |
+
# cudaPackages would've refused to evaluate anyway.
|
| 217 |
+
badPlatforms = optionals useCuda lib.platforms.darwin;
|
| 218 |
+
|
| 219 |
+
# Configurations that are known to result in build failures. Can be
|
| 220 |
+
# overridden by importing Nixpkgs with `allowBroken = true`.
|
| 221 |
+
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
| 222 |
+
|
| 223 |
+
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
| 224 |
+
homepage = "https://github.com/ggml-org/llama.cpp/";
|
| 225 |
+
license = lib.licenses.mit;
|
| 226 |
+
|
| 227 |
+
# Accommodates `nix run` and `lib.getExe`
|
| 228 |
+
mainProgram = "llama-cli";
|
| 229 |
+
|
| 230 |
+
# These people might respond, on the best effort basis, if you ping them
|
| 231 |
+
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
| 232 |
+
# Consider adding yourself to this list if you want to ensure this flake
|
| 233 |
+
# stays maintained and you're willing to invest your time. Do not add
|
| 234 |
+
# other people without their consent. Consider removing people after
|
| 235 |
+
# they've been unreachable for long periods of time.
|
| 236 |
+
|
| 237 |
+
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
| 238 |
+
# an attrset following the same format as in
|
| 239 |
+
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
| 240 |
+
maintainers = with lib.maintainers; [
|
| 241 |
+
philiptaron
|
| 242 |
+
SomeoneSerge
|
| 243 |
+
];
|
| 244 |
+
|
| 245 |
+
# Extend `badPlatforms` instead
|
| 246 |
+
platforms = lib.platforms.all;
|
| 247 |
+
};
|
| 248 |
+
})
|
llama.cpp/.devops/nix/python-scripts.nix
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
stdenv,
|
| 4 |
+
buildPythonPackage,
|
| 5 |
+
poetry-core,
|
| 6 |
+
mkShell,
|
| 7 |
+
python3Packages,
|
| 8 |
+
gguf-py,
|
| 9 |
+
}@inputs:
|
| 10 |
+
|
| 11 |
+
let
|
| 12 |
+
llama-python-deps = with python3Packages; [
|
| 13 |
+
numpy
|
| 14 |
+
sentencepiece
|
| 15 |
+
transformers
|
| 16 |
+
protobuf
|
| 17 |
+
torchWithoutCuda
|
| 18 |
+
gguf-py
|
| 19 |
+
tqdm
|
| 20 |
+
|
| 21 |
+
# for scripts/compare-llama-bench.py
|
| 22 |
+
gitpython
|
| 23 |
+
tabulate
|
| 24 |
+
|
| 25 |
+
# for examples/pydantic-models-to-grammar-examples.py
|
| 26 |
+
docstring-parser
|
| 27 |
+
pydantic
|
| 28 |
+
|
| 29 |
+
];
|
| 30 |
+
|
| 31 |
+
llama-python-test-deps = with python3Packages; [
|
| 32 |
+
# Server bench
|
| 33 |
+
matplotlib
|
| 34 |
+
|
| 35 |
+
# server tests
|
| 36 |
+
openai
|
| 37 |
+
pytest
|
| 38 |
+
prometheus-client
|
| 39 |
+
];
|
| 40 |
+
in
|
| 41 |
+
|
| 42 |
+
buildPythonPackage ({
|
| 43 |
+
pname = "llama-scripts";
|
| 44 |
+
version = "0.0.0";
|
| 45 |
+
pyproject = true;
|
| 46 |
+
|
| 47 |
+
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
| 48 |
+
# do they affect the output hash. They can be modified without triggering a rebuild.
|
| 49 |
+
src = lib.cleanSourceWith {
|
| 50 |
+
filter =
|
| 51 |
+
name: type:
|
| 52 |
+
let
|
| 53 |
+
any = builtins.any (x: x);
|
| 54 |
+
baseName = builtins.baseNameOf name;
|
| 55 |
+
in
|
| 56 |
+
any [
|
| 57 |
+
(lib.hasSuffix ".py" name)
|
| 58 |
+
(baseName == "README.md")
|
| 59 |
+
(baseName == "pyproject.toml")
|
| 60 |
+
];
|
| 61 |
+
src = lib.cleanSource ../../.;
|
| 62 |
+
};
|
| 63 |
+
nativeBuildInputs = [ poetry-core ];
|
| 64 |
+
nativeCheckInputs = llama-python-test-deps;
|
| 65 |
+
dependencies = llama-python-deps;
|
| 66 |
+
})
|
llama.cpp/.devops/nix/scope.nix
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
newScope,
|
| 4 |
+
python3,
|
| 5 |
+
llamaVersion ? "0.0.0",
|
| 6 |
+
}:
|
| 7 |
+
|
| 8 |
+
let
|
| 9 |
+
pythonPackages = python3.pkgs;
|
| 10 |
+
buildPythonPackage = pythonPackages.buildPythonPackage;
|
| 11 |
+
numpy = pythonPackages.numpy;
|
| 12 |
+
tqdm = pythonPackages.tqdm;
|
| 13 |
+
sentencepiece = pythonPackages.sentencepiece;
|
| 14 |
+
pyyaml = pythonPackages.pyyaml;
|
| 15 |
+
poetry-core = pythonPackages.poetry-core;
|
| 16 |
+
pytestCheckHook = pythonPackages.pytestCheckHook;
|
| 17 |
+
in
|
| 18 |
+
|
| 19 |
+
# We're using `makeScope` instead of just writing out an attrset
|
| 20 |
+
# because it allows users to apply overlays later using `overrideScope'`.
|
| 21 |
+
# Cf. https://noogle.dev/f/lib/makeScope
|
| 22 |
+
|
| 23 |
+
lib.makeScope newScope (self: {
|
| 24 |
+
inherit llamaVersion;
|
| 25 |
+
gguf-py = self.callPackage ./package-gguf-py.nix {
|
| 26 |
+
inherit
|
| 27 |
+
buildPythonPackage
|
| 28 |
+
numpy
|
| 29 |
+
tqdm
|
| 30 |
+
sentencepiece
|
| 31 |
+
poetry-core
|
| 32 |
+
pyyaml
|
| 33 |
+
pytestCheckHook
|
| 34 |
+
;
|
| 35 |
+
};
|
| 36 |
+
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
| 37 |
+
llama-cpp = self.callPackage ./package.nix { };
|
| 38 |
+
docker = self.callPackage ./docker.nix { };
|
| 39 |
+
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
| 40 |
+
sif = self.callPackage ./sif.nix { };
|
| 41 |
+
})
|
llama.cpp/.devops/nix/sif.nix
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
lib,
|
| 3 |
+
singularity-tools,
|
| 4 |
+
llama-cpp,
|
| 5 |
+
bashInteractive,
|
| 6 |
+
interactive ? false,
|
| 7 |
+
}:
|
| 8 |
+
|
| 9 |
+
let
|
| 10 |
+
optionalInt = cond: x: if cond then x else 0;
|
| 11 |
+
in
|
| 12 |
+
singularity-tools.buildImage rec {
|
| 13 |
+
inherit (llama-cpp) name;
|
| 14 |
+
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
|
| 15 |
+
|
| 16 |
+
# These are excessive (but safe) for most variants. Building singularity
|
| 17 |
+
# images requires superuser privileges, so we build them inside a VM in a
|
| 18 |
+
# writable image of pre-determined size.
|
| 19 |
+
#
|
| 20 |
+
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
|
| 21 |
+
#
|
| 22 |
+
# Expected image sizes:
|
| 23 |
+
# - cpu/blas: 150M,
|
| 24 |
+
# - cuda, all gencodes: 560M,
|
| 25 |
+
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
|
| 26 |
+
memSize = diskSize;
|
| 27 |
+
}
|
llama.cpp/.devops/rocm.Dockerfile
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=24.04
|
| 2 |
+
|
| 3 |
+
# This needs to generally match the container host's environment.
|
| 4 |
+
ARG ROCM_VERSION=6.4
|
| 5 |
+
ARG AMDGPU_VERSION=6.4
|
| 6 |
+
|
| 7 |
+
# Target the CUDA build image
|
| 8 |
+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
| 9 |
+
|
| 10 |
+
### Build image
|
| 11 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
| 12 |
+
|
| 13 |
+
# Unless otherwise specified, we make a fat build.
|
| 14 |
+
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
|
| 15 |
+
# This is mostly tied to rocBLAS supported archs.
|
| 16 |
+
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
|
| 17 |
+
# gfx906 is deprecated
|
| 18 |
+
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
|
| 19 |
+
|
| 20 |
+
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
|
| 21 |
+
#ARG ROCM_DOCKER_ARCH=gfx1100
|
| 22 |
+
|
| 23 |
+
# Set nvcc architectured
|
| 24 |
+
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
| 25 |
+
# Enable ROCm
|
| 26 |
+
# ENV CC=/opt/rocm/llvm/bin/clang
|
| 27 |
+
# ENV CXX=/opt/rocm/llvm/bin/clang++
|
| 28 |
+
|
| 29 |
+
RUN apt-get update \
|
| 30 |
+
&& apt-get install -y \
|
| 31 |
+
build-essential \
|
| 32 |
+
cmake \
|
| 33 |
+
git \
|
| 34 |
+
libcurl4-openssl-dev \
|
| 35 |
+
curl \
|
| 36 |
+
libgomp1
|
| 37 |
+
|
| 38 |
+
WORKDIR /app
|
| 39 |
+
|
| 40 |
+
COPY . .
|
| 41 |
+
|
| 42 |
+
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
| 43 |
+
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
|
| 44 |
+
&& cmake --build build --config Release -j$(nproc)
|
| 45 |
+
|
| 46 |
+
RUN mkdir -p /app/lib \
|
| 47 |
+
&& find build -name "*.so" -exec cp {} /app/lib \;
|
| 48 |
+
|
| 49 |
+
RUN mkdir -p /app/full \
|
| 50 |
+
&& cp build/bin/* /app/full \
|
| 51 |
+
&& cp *.py /app/full \
|
| 52 |
+
&& cp -r gguf-py /app/full \
|
| 53 |
+
&& cp -r requirements /app/full \
|
| 54 |
+
&& cp requirements.txt /app/full \
|
| 55 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 56 |
+
|
| 57 |
+
## Base image
|
| 58 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
| 59 |
+
|
| 60 |
+
RUN apt-get update \
|
| 61 |
+
&& apt-get install -y libgomp1 curl\
|
| 62 |
+
&& apt autoremove -y \
|
| 63 |
+
&& apt clean -y \
|
| 64 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 65 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 66 |
+
&& find /var/cache -type f -delete
|
| 67 |
+
|
| 68 |
+
COPY --from=build /app/lib/ /app
|
| 69 |
+
|
| 70 |
+
### Full
|
| 71 |
+
FROM base AS full
|
| 72 |
+
|
| 73 |
+
COPY --from=build /app/full /app
|
| 74 |
+
|
| 75 |
+
WORKDIR /app
|
| 76 |
+
|
| 77 |
+
RUN apt-get update \
|
| 78 |
+
&& apt-get install -y \
|
| 79 |
+
git \
|
| 80 |
+
python3-pip \
|
| 81 |
+
python3 \
|
| 82 |
+
python3-wheel\
|
| 83 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
| 84 |
+
&& pip install --break-system-packages -r requirements.txt \
|
| 85 |
+
&& apt autoremove -y \
|
| 86 |
+
&& apt clean -y \
|
| 87 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 88 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 89 |
+
&& find /var/cache -type f -delete
|
| 90 |
+
|
| 91 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 92 |
+
|
| 93 |
+
### Light, CLI only
|
| 94 |
+
FROM base AS light
|
| 95 |
+
|
| 96 |
+
COPY --from=build /app/full/llama-cli /app
|
| 97 |
+
|
| 98 |
+
WORKDIR /app
|
| 99 |
+
|
| 100 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 101 |
+
|
| 102 |
+
### Server, Server only
|
| 103 |
+
FROM base AS server
|
| 104 |
+
|
| 105 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 106 |
+
|
| 107 |
+
COPY --from=build /app/full/llama-server /app
|
| 108 |
+
|
| 109 |
+
WORKDIR /app
|
| 110 |
+
|
| 111 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 112 |
+
|
| 113 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/tools.sh
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
# Read the first argument into a variable
|
| 5 |
+
arg1="$1"
|
| 6 |
+
|
| 7 |
+
# Shift the arguments to remove the first one
|
| 8 |
+
shift
|
| 9 |
+
|
| 10 |
+
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
| 11 |
+
exec python3 ./convert_hf_to_gguf.py "$@"
|
| 12 |
+
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
| 13 |
+
exec ./llama-quantize "$@"
|
| 14 |
+
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
| 15 |
+
exec ./llama-cli "$@"
|
| 16 |
+
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
|
| 17 |
+
exec ./llama-bench "$@"
|
| 18 |
+
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
|
| 19 |
+
exec ./llama-perplexity "$@"
|
| 20 |
+
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
| 21 |
+
echo "Converting PTH to GGML..."
|
| 22 |
+
for i in $(ls $1/$2/ggml-model-f16.bin*); do
|
| 23 |
+
if [ -f "${i/f16/q4_0}" ]; then
|
| 24 |
+
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
| 25 |
+
else
|
| 26 |
+
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
| 27 |
+
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
| 28 |
+
fi
|
| 29 |
+
done
|
| 30 |
+
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
| 31 |
+
exec ./llama-server "$@"
|
| 32 |
+
else
|
| 33 |
+
echo "Unknown command: $arg1"
|
| 34 |
+
echo "Available commands: "
|
| 35 |
+
echo " --run (-r): Run a model previously converted into ggml"
|
| 36 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
| 37 |
+
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
|
| 38 |
+
echo " ex: -m model.gguf"
|
| 39 |
+
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
|
| 40 |
+
echo " ex: -m model.gguf -f file.txt"
|
| 41 |
+
echo " --convert (-c): Convert a llama model into ggml"
|
| 42 |
+
echo " ex: --outtype f16 \"/models/7B/\" "
|
| 43 |
+
echo " --quantize (-q): Optimize with quantization process ggml"
|
| 44 |
+
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
| 45 |
+
echo " --all-in-one (-a): Execute --convert & --quantize"
|
| 46 |
+
echo " ex: \"/models/\" 7B"
|
| 47 |
+
echo " --server (-s): Run a model on the server"
|
| 48 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
|
| 49 |
+
fi
|
llama.cpp/.devops/vulkan.Dockerfile
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=24.04
|
| 2 |
+
|
| 3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
| 4 |
+
|
| 5 |
+
# Install build tools
|
| 6 |
+
RUN apt update && apt install -y git build-essential cmake wget
|
| 7 |
+
|
| 8 |
+
# Install Vulkan SDK and cURL
|
| 9 |
+
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
| 10 |
+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
| 11 |
+
apt update -y && \
|
| 12 |
+
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
| 13 |
+
|
| 14 |
+
# Build it
|
| 15 |
+
WORKDIR /app
|
| 16 |
+
|
| 17 |
+
COPY . .
|
| 18 |
+
|
| 19 |
+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
|
| 20 |
+
cmake --build build --config Release -j$(nproc)
|
| 21 |
+
|
| 22 |
+
RUN mkdir -p /app/lib && \
|
| 23 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
| 24 |
+
|
| 25 |
+
RUN mkdir -p /app/full \
|
| 26 |
+
&& cp build/bin/* /app/full \
|
| 27 |
+
&& cp *.py /app/full \
|
| 28 |
+
&& cp -r gguf-py /app/full \
|
| 29 |
+
&& cp -r requirements /app/full \
|
| 30 |
+
&& cp requirements.txt /app/full \
|
| 31 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 32 |
+
|
| 33 |
+
## Base image
|
| 34 |
+
FROM ubuntu:$UBUNTU_VERSION AS base
|
| 35 |
+
|
| 36 |
+
RUN apt-get update \
|
| 37 |
+
&& apt-get install -y libgomp1 curl libvulkan-dev \
|
| 38 |
+
&& apt autoremove -y \
|
| 39 |
+
&& apt clean -y \
|
| 40 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 41 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 42 |
+
&& find /var/cache -type f -delete
|
| 43 |
+
|
| 44 |
+
COPY --from=build /app/lib/ /app
|
| 45 |
+
|
| 46 |
+
### Full
|
| 47 |
+
FROM base AS full
|
| 48 |
+
|
| 49 |
+
COPY --from=build /app/full /app
|
| 50 |
+
|
| 51 |
+
WORKDIR /app
|
| 52 |
+
|
| 53 |
+
RUN apt-get update \
|
| 54 |
+
&& apt-get install -y \
|
| 55 |
+
git \
|
| 56 |
+
python3 \
|
| 57 |
+
python3-pip \
|
| 58 |
+
python3-wheel \
|
| 59 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
| 60 |
+
&& pip install --break-system-packages -r requirements.txt \
|
| 61 |
+
&& apt autoremove -y \
|
| 62 |
+
&& apt clean -y \
|
| 63 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 64 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 65 |
+
&& find /var/cache -type f -delete
|
| 66 |
+
|
| 67 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 68 |
+
|
| 69 |
+
### Light, CLI only
|
| 70 |
+
FROM base AS light
|
| 71 |
+
|
| 72 |
+
COPY --from=build /app/full/llama-cli /app
|
| 73 |
+
|
| 74 |
+
WORKDIR /app
|
| 75 |
+
|
| 76 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 77 |
+
|
| 78 |
+
### Server, Server only
|
| 79 |
+
FROM base AS server
|
| 80 |
+
|
| 81 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 82 |
+
|
| 83 |
+
COPY --from=build /app/full/llama-server /app
|
| 84 |
+
|
| 85 |
+
WORKDIR /app
|
| 86 |
+
|
| 87 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 88 |
+
|
| 89 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.dockerignore
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.o
|
| 2 |
+
*.a
|
| 3 |
+
.cache/
|
| 4 |
+
# Do not ignore .git directory, otherwise the reported build number will always be 0
|
| 5 |
+
.github/
|
| 6 |
+
.gitignore
|
| 7 |
+
.vs/
|
| 8 |
+
.vscode/
|
| 9 |
+
.DS_Store
|
| 10 |
+
|
| 11 |
+
build*/
|
| 12 |
+
|
| 13 |
+
models/*
|
| 14 |
+
|
| 15 |
+
/llama-cli
|
| 16 |
+
/llama-quantize
|
| 17 |
+
|
| 18 |
+
arm_neon.h
|
| 19 |
+
compile_commands.json
|
| 20 |
+
Dockerfile
|
llama.cpp/.ecrc
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
|
| 3 |
+
"Disable": {
|
| 4 |
+
"IndentSize": true
|
| 5 |
+
}
|
| 6 |
+
}
|
llama.cpp/.editorconfig
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://EditorConfig.org
|
| 2 |
+
|
| 3 |
+
# Top-most EditorConfig file
|
| 4 |
+
root = true
|
| 5 |
+
|
| 6 |
+
# Unix-style newlines with a newline ending every file, utf-8 charset
|
| 7 |
+
[*]
|
| 8 |
+
end_of_line = lf
|
| 9 |
+
insert_final_newline = true
|
| 10 |
+
trim_trailing_whitespace = true
|
| 11 |
+
charset = utf-8
|
| 12 |
+
indent_style = space
|
| 13 |
+
indent_size = 4
|
| 14 |
+
|
| 15 |
+
[Makefile]
|
| 16 |
+
indent_style = tab
|
| 17 |
+
|
| 18 |
+
[scripts/*.mk]
|
| 19 |
+
indent_style = tab
|
| 20 |
+
|
| 21 |
+
[prompts/*.txt]
|
| 22 |
+
insert_final_newline = unset
|
| 23 |
+
|
| 24 |
+
[tools/server/public/*]
|
| 25 |
+
indent_size = 2
|
| 26 |
+
|
| 27 |
+
[tools/server/public/deps_*]
|
| 28 |
+
trim_trailing_whitespace = unset
|
| 29 |
+
indent_style = unset
|
| 30 |
+
indent_size = unset
|
| 31 |
+
|
| 32 |
+
[tools/server/deps_*]
|
| 33 |
+
trim_trailing_whitespace = unset
|
| 34 |
+
indent_style = unset
|
| 35 |
+
indent_size = unset
|
| 36 |
+
|
| 37 |
+
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
| 38 |
+
indent_style = tab
|
| 39 |
+
|
| 40 |
+
[tools/cvector-generator/*.txt]
|
| 41 |
+
trim_trailing_whitespace = unset
|
| 42 |
+
insert_final_newline = unset
|
| 43 |
+
|
| 44 |
+
[models/templates/*.jinja]
|
| 45 |
+
indent_style = unset
|
| 46 |
+
indent_size = unset
|
| 47 |
+
end_of_line = unset
|
| 48 |
+
charset = unset
|
| 49 |
+
trim_trailing_whitespace = unset
|
| 50 |
+
insert_final_newline = unset
|
| 51 |
+
|
| 52 |
+
[vendor/miniaudio/miniaudio.h]
|
| 53 |
+
trim_trailing_whitespace = unset
|
| 54 |
+
insert_final_newline = unset
|
llama.cpp/.flake8
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[flake8]
|
| 2 |
+
max-line-length = 125
|
| 3 |
+
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
|
| 4 |
+
exclude =
|
| 5 |
+
# Do not traverse examples and tools
|
| 6 |
+
examples,
|
| 7 |
+
tools,
|
| 8 |
+
# Do not include package initializers
|
| 9 |
+
__init__.py,
|
| 10 |
+
# No need to traverse our git directory
|
| 11 |
+
.git,
|
| 12 |
+
# There's no value in checking cache directories
|
| 13 |
+
__pycache__,
|
| 14 |
+
# No need to include the build path
|
| 15 |
+
build,
|
| 16 |
+
# This contains builds that we don't want to check
|
| 17 |
+
dist # This is generated with `python build .` for package releases
|
| 18 |
+
# max-complexity = 10
|
llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Bug (compilation)
|
| 2 |
+
description: Something goes wrong when trying to compile llama.cpp.
|
| 3 |
+
title: "Compile bug: "
|
| 4 |
+
labels: ["bug-unconfirmed", "compilation"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: >
|
| 9 |
+
Thanks for taking the time to fill out this bug report!
|
| 10 |
+
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
| 11 |
+
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
| 12 |
+
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
| 13 |
+
by clearing `~/.cache/ccache` (on Linux).
|
| 14 |
+
- type: textarea
|
| 15 |
+
id: commit
|
| 16 |
+
attributes:
|
| 17 |
+
label: Git commit
|
| 18 |
+
description: Which commit are you trying to compile?
|
| 19 |
+
placeholder: |
|
| 20 |
+
$git rev-parse HEAD
|
| 21 |
+
84a07a17b1b08cf2b9747c633a2372782848a27f
|
| 22 |
+
validations:
|
| 23 |
+
required: true
|
| 24 |
+
- type: dropdown
|
| 25 |
+
id: operating-system
|
| 26 |
+
attributes:
|
| 27 |
+
label: Operating systems
|
| 28 |
+
description: Which operating systems do you know to be affected?
|
| 29 |
+
multiple: true
|
| 30 |
+
options:
|
| 31 |
+
- Linux
|
| 32 |
+
- Mac
|
| 33 |
+
- Windows
|
| 34 |
+
- BSD
|
| 35 |
+
- Other? (Please let us know in description)
|
| 36 |
+
validations:
|
| 37 |
+
required: true
|
| 38 |
+
- type: dropdown
|
| 39 |
+
id: backends
|
| 40 |
+
attributes:
|
| 41 |
+
label: GGML backends
|
| 42 |
+
description: Which GGML backends do you know to be affected?
|
| 43 |
+
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
|
| 44 |
+
multiple: true
|
| 45 |
+
validations:
|
| 46 |
+
required: true
|
| 47 |
+
- type: textarea
|
| 48 |
+
id: info
|
| 49 |
+
attributes:
|
| 50 |
+
label: Problem description & steps to reproduce
|
| 51 |
+
description: >
|
| 52 |
+
Please give us a summary of the problem and tell us how to reproduce it.
|
| 53 |
+
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
| 54 |
+
placeholder: >
|
| 55 |
+
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
|
| 56 |
+
Here are the exact commands that I used: ...
|
| 57 |
+
validations:
|
| 58 |
+
required: true
|
| 59 |
+
- type: textarea
|
| 60 |
+
id: first_bad_commit
|
| 61 |
+
attributes:
|
| 62 |
+
label: First Bad Commit
|
| 63 |
+
description: >
|
| 64 |
+
If the bug was not present on an earlier version: when did it start appearing?
|
| 65 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
| 66 |
+
validations:
|
| 67 |
+
required: false
|
| 68 |
+
- type: textarea
|
| 69 |
+
id: command
|
| 70 |
+
attributes:
|
| 71 |
+
label: Compile command
|
| 72 |
+
description: >
|
| 73 |
+
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
|
| 74 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 75 |
+
render: shell
|
| 76 |
+
validations:
|
| 77 |
+
required: true
|
| 78 |
+
- type: textarea
|
| 79 |
+
id: logs
|
| 80 |
+
attributes:
|
| 81 |
+
label: Relevant log output
|
| 82 |
+
description: >
|
| 83 |
+
Please copy and paste any relevant log output, including any generated text.
|
| 84 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 85 |
+
render: shell
|
| 86 |
+
validations:
|
| 87 |
+
required: true
|
llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Bug (model use)
|
| 2 |
+
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
| 3 |
+
title: "Eval bug: "
|
| 4 |
+
labels: ["bug-unconfirmed", "model evaluation"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: >
|
| 9 |
+
Thanks for taking the time to fill out this bug report!
|
| 10 |
+
This issue template is intended for bug reports where the model evaluation results
|
| 11 |
+
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
| 12 |
+
If you encountered the issue while using an external UI (e.g. ollama),
|
| 13 |
+
please reproduce your issue using one of the examples/binaries in this repository.
|
| 14 |
+
The `llama-cli` binary can be used for simple and reproducible model inference.
|
| 15 |
+
- type: textarea
|
| 16 |
+
id: version
|
| 17 |
+
attributes:
|
| 18 |
+
label: Name and Version
|
| 19 |
+
description: Which version of our software are you running? (use `--version` to get a version string)
|
| 20 |
+
placeholder: |
|
| 21 |
+
$./llama-cli --version
|
| 22 |
+
version: 2999 (42b4109e)
|
| 23 |
+
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
| 24 |
+
validations:
|
| 25 |
+
required: true
|
| 26 |
+
- type: dropdown
|
| 27 |
+
id: operating-system
|
| 28 |
+
attributes:
|
| 29 |
+
label: Operating systems
|
| 30 |
+
description: Which operating systems do you know to be affected?
|
| 31 |
+
multiple: true
|
| 32 |
+
options:
|
| 33 |
+
- Linux
|
| 34 |
+
- Mac
|
| 35 |
+
- Windows
|
| 36 |
+
- BSD
|
| 37 |
+
- Other? (Please let us know in description)
|
| 38 |
+
validations:
|
| 39 |
+
required: true
|
| 40 |
+
- type: dropdown
|
| 41 |
+
id: backends
|
| 42 |
+
attributes:
|
| 43 |
+
label: GGML backends
|
| 44 |
+
description: Which GGML backends do you know to be affected?
|
| 45 |
+
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
|
| 46 |
+
multiple: true
|
| 47 |
+
validations:
|
| 48 |
+
required: true
|
| 49 |
+
- type: textarea
|
| 50 |
+
id: hardware
|
| 51 |
+
attributes:
|
| 52 |
+
label: Hardware
|
| 53 |
+
description: Which CPUs/GPUs are you using?
|
| 54 |
+
placeholder: >
|
| 55 |
+
e.g. Ryzen 5950X + 2x RTX 4090
|
| 56 |
+
validations:
|
| 57 |
+
required: true
|
| 58 |
+
- type: textarea
|
| 59 |
+
id: model
|
| 60 |
+
attributes:
|
| 61 |
+
label: Models
|
| 62 |
+
description: >
|
| 63 |
+
Which model(s) at which quantization were you using when encountering the bug?
|
| 64 |
+
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
| 65 |
+
placeholder: >
|
| 66 |
+
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
| 67 |
+
validations:
|
| 68 |
+
required: false
|
| 69 |
+
- type: textarea
|
| 70 |
+
id: info
|
| 71 |
+
attributes:
|
| 72 |
+
label: Problem description & steps to reproduce
|
| 73 |
+
description: >
|
| 74 |
+
Please give us a summary of the problem and tell us how to reproduce it.
|
| 75 |
+
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
| 76 |
+
that information would be very much appreciated by us.
|
| 77 |
+
placeholder: >
|
| 78 |
+
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
| 79 |
+
When I use -ngl 0 it works correctly.
|
| 80 |
+
Here are the exact commands that I used: ...
|
| 81 |
+
validations:
|
| 82 |
+
required: true
|
| 83 |
+
- type: textarea
|
| 84 |
+
id: first_bad_commit
|
| 85 |
+
attributes:
|
| 86 |
+
label: First Bad Commit
|
| 87 |
+
description: >
|
| 88 |
+
If the bug was not present on an earlier version: when did it start appearing?
|
| 89 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
| 90 |
+
validations:
|
| 91 |
+
required: false
|
| 92 |
+
- type: textarea
|
| 93 |
+
id: logs
|
| 94 |
+
attributes:
|
| 95 |
+
label: Relevant log output
|
| 96 |
+
description: >
|
| 97 |
+
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
| 98 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 99 |
+
render: shell
|
| 100 |
+
validations:
|
| 101 |
+
required: true
|
llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Bug (misc.)
|
| 2 |
+
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
| 3 |
+
title: "Misc. bug: "
|
| 4 |
+
labels: ["bug-unconfirmed"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: >
|
| 9 |
+
Thanks for taking the time to fill out this bug report!
|
| 10 |
+
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
| 11 |
+
If you encountered the issue while using an external UI (e.g. ollama),
|
| 12 |
+
please reproduce your issue using one of the examples/binaries in this repository.
|
| 13 |
+
- type: textarea
|
| 14 |
+
id: version
|
| 15 |
+
attributes:
|
| 16 |
+
label: Name and Version
|
| 17 |
+
description: Which version of our software is affected? (You can use `--version` to get a version string.)
|
| 18 |
+
placeholder: |
|
| 19 |
+
$./llama-cli --version
|
| 20 |
+
version: 2999 (42b4109e)
|
| 21 |
+
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
| 22 |
+
validations:
|
| 23 |
+
required: true
|
| 24 |
+
- type: dropdown
|
| 25 |
+
id: operating-system
|
| 26 |
+
attributes:
|
| 27 |
+
label: Operating systems
|
| 28 |
+
description: Which operating systems do you know to be affected?
|
| 29 |
+
multiple: true
|
| 30 |
+
options:
|
| 31 |
+
- Linux
|
| 32 |
+
- Mac
|
| 33 |
+
- Windows
|
| 34 |
+
- BSD
|
| 35 |
+
- Other? (Please let us know in description)
|
| 36 |
+
validations:
|
| 37 |
+
required: false
|
| 38 |
+
- type: dropdown
|
| 39 |
+
id: module
|
| 40 |
+
attributes:
|
| 41 |
+
label: Which llama.cpp modules do you know to be affected?
|
| 42 |
+
multiple: true
|
| 43 |
+
options:
|
| 44 |
+
- Documentation/Github
|
| 45 |
+
- libllama (core library)
|
| 46 |
+
- llama-cli
|
| 47 |
+
- llama-server
|
| 48 |
+
- llama-bench
|
| 49 |
+
- llama-quantize
|
| 50 |
+
- Python/Bash scripts
|
| 51 |
+
- Test code
|
| 52 |
+
- Other (Please specify in the next section)
|
| 53 |
+
validations:
|
| 54 |
+
required: false
|
| 55 |
+
- type: textarea
|
| 56 |
+
id: command
|
| 57 |
+
attributes:
|
| 58 |
+
label: Command line
|
| 59 |
+
description: >
|
| 60 |
+
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
|
| 61 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 62 |
+
render: shell
|
| 63 |
+
validations:
|
| 64 |
+
required: false
|
| 65 |
+
- type: textarea
|
| 66 |
+
id: info
|
| 67 |
+
attributes:
|
| 68 |
+
label: Problem description & steps to reproduce
|
| 69 |
+
description: >
|
| 70 |
+
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
|
| 71 |
+
validations:
|
| 72 |
+
required: true
|
| 73 |
+
- type: textarea
|
| 74 |
+
id: first_bad_commit
|
| 75 |
+
attributes:
|
| 76 |
+
label: First Bad Commit
|
| 77 |
+
description: >
|
| 78 |
+
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
|
| 79 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
| 80 |
+
validations:
|
| 81 |
+
required: false
|
| 82 |
+
- type: textarea
|
| 83 |
+
id: logs
|
| 84 |
+
attributes:
|
| 85 |
+
label: Relevant log output
|
| 86 |
+
description: >
|
| 87 |
+
If applicable, please copy and paste any relevant log output, including any generated text.
|
| 88 |
+
This will be automatically formatted into code, so no need for backticks.
|
| 89 |
+
render: shell
|
| 90 |
+
validations:
|
| 91 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Enhancement
|
| 2 |
+
description: Used to request enhancements for llama.cpp.
|
| 3 |
+
title: "Feature Request: "
|
| 4 |
+
labels: ["enhancement"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: |
|
| 9 |
+
[Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas)
|
| 10 |
+
|
| 11 |
+
- type: checkboxes
|
| 12 |
+
id: prerequisites
|
| 13 |
+
attributes:
|
| 14 |
+
label: Prerequisites
|
| 15 |
+
description: Please confirm the following before submitting your enhancement request.
|
| 16 |
+
options:
|
| 17 |
+
- label: I am running the latest code. Mention the version if possible as well.
|
| 18 |
+
required: true
|
| 19 |
+
- label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md).
|
| 20 |
+
required: true
|
| 21 |
+
- label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
|
| 22 |
+
required: true
|
| 23 |
+
- label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share.
|
| 24 |
+
required: true
|
| 25 |
+
|
| 26 |
+
- type: textarea
|
| 27 |
+
id: feature-description
|
| 28 |
+
attributes:
|
| 29 |
+
label: Feature Description
|
| 30 |
+
description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
|
| 31 |
+
placeholder: Detailed description of the enhancement
|
| 32 |
+
validations:
|
| 33 |
+
required: true
|
| 34 |
+
|
| 35 |
+
- type: textarea
|
| 36 |
+
id: motivation
|
| 37 |
+
attributes:
|
| 38 |
+
label: Motivation
|
| 39 |
+
description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
|
| 40 |
+
placeholder: Explanation of why this feature is needed and its benefits
|
| 41 |
+
validations:
|
| 42 |
+
required: true
|
| 43 |
+
|
| 44 |
+
- type: textarea
|
| 45 |
+
id: possible-implementation
|
| 46 |
+
attributes:
|
| 47 |
+
label: Possible Implementation
|
| 48 |
+
description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
|
| 49 |
+
placeholder: Detailed description of potential implementation
|
| 50 |
+
validations:
|
| 51 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Research
|
| 2 |
+
description: Track new technical research area.
|
| 3 |
+
title: "Research: "
|
| 4 |
+
labels: ["research 🔬"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: |
|
| 9 |
+
Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
|
| 10 |
+
|
| 11 |
+
- type: checkboxes
|
| 12 |
+
id: research-stage
|
| 13 |
+
attributes:
|
| 14 |
+
label: Research Stage
|
| 15 |
+
description: Track general state of this research ticket
|
| 16 |
+
options:
|
| 17 |
+
- label: Background Research (Let's try to avoid reinventing the wheel)
|
| 18 |
+
- label: Hypothesis Formed (How do you think this will work and it's effect?)
|
| 19 |
+
- label: Strategy / Implementation Forming
|
| 20 |
+
- label: Analysis of results
|
| 21 |
+
- label: Debrief / Documentation (So people in the future can learn from us)
|
| 22 |
+
|
| 23 |
+
- type: textarea
|
| 24 |
+
id: background
|
| 25 |
+
attributes:
|
| 26 |
+
label: Previous existing literature and research
|
| 27 |
+
description: Whats the current state of the art and whats the motivation for this research?
|
| 28 |
+
|
| 29 |
+
- type: textarea
|
| 30 |
+
id: hypothesis
|
| 31 |
+
attributes:
|
| 32 |
+
label: Hypothesis
|
| 33 |
+
description: How do you think this will work and it's effect?
|
| 34 |
+
|
| 35 |
+
- type: textarea
|
| 36 |
+
id: implementation
|
| 37 |
+
attributes:
|
| 38 |
+
label: Implementation
|
| 39 |
+
description: Got an approach? e.g. a PR ready to go?
|
| 40 |
+
|
| 41 |
+
- type: textarea
|
| 42 |
+
id: analysis
|
| 43 |
+
attributes:
|
| 44 |
+
label: Analysis
|
| 45 |
+
description: How does the proposed implementation behave?
|
| 46 |
+
|
| 47 |
+
- type: textarea
|
| 48 |
+
id: logs
|
| 49 |
+
attributes:
|
| 50 |
+
label: Relevant log output
|
| 51 |
+
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
| 52 |
+
render: shell
|
llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Refactor (Maintainers)
|
| 2 |
+
description: Used to track refactoring opportunities.
|
| 3 |
+
title: "Refactor: "
|
| 4 |
+
labels: ["refactor"]
|
| 5 |
+
body:
|
| 6 |
+
- type: markdown
|
| 7 |
+
attributes:
|
| 8 |
+
value: |
|
| 9 |
+
Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
|
| 10 |
+
Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
|
| 11 |
+
|
| 12 |
+
- type: textarea
|
| 13 |
+
id: background-description
|
| 14 |
+
attributes:
|
| 15 |
+
label: Background Description
|
| 16 |
+
description: Please provide a detailed written description of the pain points you are trying to solve.
|
| 17 |
+
placeholder: Detailed description behind your motivation to request refactor
|
| 18 |
+
validations:
|
| 19 |
+
required: true
|
| 20 |
+
|
| 21 |
+
- type: textarea
|
| 22 |
+
id: possible-approaches
|
| 23 |
+
attributes:
|
| 24 |
+
label: Possible Refactor Approaches
|
| 25 |
+
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
|
| 26 |
+
placeholder: Your idea of possible refactoring opportunity/approaches
|
| 27 |
+
validations:
|
| 28 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/config.yml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
blank_issues_enabled: true
|
| 2 |
+
contact_links:
|
| 3 |
+
- name: Got an idea?
|
| 4 |
+
url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas
|
| 5 |
+
about: Pop it there. It may then become an enhancement ticket.
|
| 6 |
+
- name: Got a question?
|
| 7 |
+
url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a
|
| 8 |
+
about: Ask a question there!
|
| 9 |
+
- name: Want to contribute?
|
| 10 |
+
url: https://github.com/ggml-org/llama.cpp/wiki/contribute
|
| 11 |
+
about: Head to the contribution guide page of the wiki for areas you can help with
|
llama.cpp/.github/actions/get-tag-name/action.yml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: "Determine tag name"
|
| 2 |
+
description: "Determine the tag name to use for a release"
|
| 3 |
+
outputs:
|
| 4 |
+
name:
|
| 5 |
+
description: "The name of the tag"
|
| 6 |
+
value: ${{ steps.tag.outputs.name }}
|
| 7 |
+
|
| 8 |
+
runs:
|
| 9 |
+
using: "composite"
|
| 10 |
+
steps:
|
| 11 |
+
- name: Determine tag name
|
| 12 |
+
id: tag
|
| 13 |
+
shell: bash
|
| 14 |
+
run: |
|
| 15 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
| 16 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
| 17 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
| 18 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
| 19 |
+
else
|
| 20 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
| 21 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
| 22 |
+
fi
|
llama.cpp/.github/actions/windows-setup-cuda/action.yml
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: "Windows - Setup CUDA Toolkit"
|
| 2 |
+
description: "Setup CUDA Toolkit for Windows"
|
| 3 |
+
inputs:
|
| 4 |
+
cuda_version:
|
| 5 |
+
description: "CUDA toolkit version"
|
| 6 |
+
required: true
|
| 7 |
+
|
| 8 |
+
runs:
|
| 9 |
+
using: "composite"
|
| 10 |
+
steps:
|
| 11 |
+
- name: Install Cuda Toolkit 11.7
|
| 12 |
+
if: ${{ inputs.cuda_version == '11.7' }}
|
| 13 |
+
shell: pwsh
|
| 14 |
+
run: |
|
| 15 |
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
| 16 |
+
choco install unzip -y
|
| 17 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
|
| 18 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
|
| 19 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
|
| 20 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
|
| 21 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
|
| 22 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
|
| 23 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
|
| 24 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
|
| 25 |
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
| 26 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 27 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 28 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 29 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 30 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 31 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 32 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 33 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
| 34 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
| 35 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
| 36 |
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
| 37 |
+
echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
| 38 |
+
|
| 39 |
+
- name: Install Cuda Toolkit 12.4
|
| 40 |
+
if: ${{ inputs.cuda_version == '12.4' }}
|
| 41 |
+
shell: pwsh
|
| 42 |
+
run: |
|
| 43 |
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
| 44 |
+
choco install unzip -y
|
| 45 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
|
| 46 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
|
| 47 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
|
| 48 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
|
| 49 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
|
| 50 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
|
| 51 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
|
| 52 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
|
| 53 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
|
| 54 |
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
| 55 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 56 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 57 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 58 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 59 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 60 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 61 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 62 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 63 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
| 64 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
| 65 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
| 66 |
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
| 67 |
+
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
llama.cpp/.github/actions/windows-setup-curl/action.yml
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: 'Windows - Setup CURL'
|
| 2 |
+
description: 'Composite action, to be reused in other workflow'
|
| 3 |
+
inputs:
|
| 4 |
+
curl_version:
|
| 5 |
+
description: 'CURL version'
|
| 6 |
+
required: false
|
| 7 |
+
default: '8.6.0_6'
|
| 8 |
+
architecture:
|
| 9 |
+
description: 'Architecture of the libcurl to download'
|
| 10 |
+
required: false
|
| 11 |
+
default: 'win64'
|
| 12 |
+
outputs:
|
| 13 |
+
curl_path:
|
| 14 |
+
description: "Path to the downloaded libcurl"
|
| 15 |
+
value: ${{ steps.get_libcurl.outputs.curl_path }}
|
| 16 |
+
|
| 17 |
+
runs:
|
| 18 |
+
using: "composite"
|
| 19 |
+
steps:
|
| 20 |
+
- name: libCURL
|
| 21 |
+
id: get_libcurl
|
| 22 |
+
shell: powershell
|
| 23 |
+
env:
|
| 24 |
+
CURL_VERSION: ${{ inputs.curl_version }}
|
| 25 |
+
ARCHITECTURE: ${{ inputs.architecture }}
|
| 26 |
+
run: |
|
| 27 |
+
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-${env:ARCHITECTURE}-mingw.zip"
|
| 28 |
+
mkdir $env:RUNNER_TEMP/libcurl
|
| 29 |
+
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
|
| 30 |
+
echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT
|
llama.cpp/.github/labeler.yml
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://github.com/actions/labeler
|
| 2 |
+
Apple Metal:
|
| 3 |
+
- changed-files:
|
| 4 |
+
- any-glob-to-any-file:
|
| 5 |
+
- ggml/include/ggml-metal.h
|
| 6 |
+
- ggml/src/ggml-metal/**
|
| 7 |
+
- README-metal.md
|
| 8 |
+
SYCL:
|
| 9 |
+
- changed-files:
|
| 10 |
+
- any-glob-to-any-file:
|
| 11 |
+
- ggml/include/ggml-sycl.h
|
| 12 |
+
- ggml/src/ggml-sycl/**
|
| 13 |
+
- docs/backend/SYCL.md
|
| 14 |
+
- examples/sycl/**
|
| 15 |
+
Nvidia GPU:
|
| 16 |
+
- changed-files:
|
| 17 |
+
- any-glob-to-any-file:
|
| 18 |
+
- ggml/include/ggml-cuda.h
|
| 19 |
+
- ggml/src/ggml-cuda/**
|
| 20 |
+
Vulkan:
|
| 21 |
+
- changed-files:
|
| 22 |
+
- any-glob-to-any-file:
|
| 23 |
+
- ggml/include/ggml-vulkan.h
|
| 24 |
+
- ggml/src/ggml-vulkan/**
|
| 25 |
+
documentation:
|
| 26 |
+
- changed-files:
|
| 27 |
+
- any-glob-to-any-file:
|
| 28 |
+
- docs/**
|
| 29 |
+
- media/**
|
| 30 |
+
testing:
|
| 31 |
+
- changed-files:
|
| 32 |
+
- any-glob-to-any-file:
|
| 33 |
+
- tests/**
|
| 34 |
+
build:
|
| 35 |
+
- changed-files:
|
| 36 |
+
- any-glob-to-any-file:
|
| 37 |
+
- cmake/**
|
| 38 |
+
- CMakeLists.txt
|
| 39 |
+
- CMakePresets.json
|
| 40 |
+
examples:
|
| 41 |
+
- changed-files:
|
| 42 |
+
- any-glob-to-any-file:
|
| 43 |
+
- examples/**
|
| 44 |
+
- tools/**
|
| 45 |
+
devops:
|
| 46 |
+
- changed-files:
|
| 47 |
+
- any-glob-to-any-file:
|
| 48 |
+
- .devops/**
|
| 49 |
+
- .github/**
|
| 50 |
+
- ci/**
|
| 51 |
+
python:
|
| 52 |
+
- changed-files:
|
| 53 |
+
- any-glob-to-any-file:
|
| 54 |
+
- "**/*.py"
|
| 55 |
+
- requirements/**
|
| 56 |
+
- gguf-py/**
|
| 57 |
+
- .flake8
|
| 58 |
+
script:
|
| 59 |
+
- changed-files:
|
| 60 |
+
- any-glob-to-any-file:
|
| 61 |
+
- scripts/**
|
| 62 |
+
android:
|
| 63 |
+
- changed-files:
|
| 64 |
+
- any-glob-to-any-file:
|
| 65 |
+
- examples/llama.android/**
|
| 66 |
+
server:
|
| 67 |
+
- changed-files:
|
| 68 |
+
- any-glob-to-any-file:
|
| 69 |
+
- tools/server/**
|
| 70 |
+
ggml:
|
| 71 |
+
- changed-files:
|
| 72 |
+
- any-glob-to-any-file:
|
| 73 |
+
- ggml/**
|
| 74 |
+
nix:
|
| 75 |
+
- changed-files:
|
| 76 |
+
- any-glob-to-any-file:
|
| 77 |
+
- "**/*.nix"
|
| 78 |
+
- .github/workflows/nix-*.yml
|
| 79 |
+
- .devops/nix/nixpkgs-instances.nix
|
| 80 |
+
embedding:
|
| 81 |
+
- changed-files:
|
| 82 |
+
- any-glob-to-any-file: examples/embedding/
|
| 83 |
+
|
| 84 |
+
Ascend NPU:
|
| 85 |
+
- changed-files:
|
| 86 |
+
- any-glob-to-any-file:
|
| 87 |
+
- ggml/include/ggml-cann.h
|
| 88 |
+
- ggml/src/ggml-cann/**
|
| 89 |
+
- docs/backend/CANN.md
|
| 90 |
+
OpenCL:
|
| 91 |
+
- changed-files:
|
| 92 |
+
- any-glob-to-any-file:
|
| 93 |
+
- ggml/include/ggml-opencl.h
|
| 94 |
+
- ggml/src/ggml-opencl/**
|
llama.cpp/.github/pull_request_template.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|