AIencoder commited on
Commit
8a87fa8
·
verified ·
1 Parent(s): 8e06386

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +114 -0
  2. .gitgnore +49 -0
  3. .gradio/certificate.pem +31 -0
  4. README.md +3 -9
  5. agents/__init__.py +1 -0
  6. agents/coder.py +23 -0
  7. agents/reviewer.py +27 -0
  8. app.py +194 -0
  9. assets/.gitkeep +1 -0
  10. assets/response.wav +0 -0
  11. llama.cpp/.clang-format +164 -0
  12. llama.cpp/.clang-tidy +27 -0
  13. llama.cpp/.devops/cann.Dockerfile +130 -0
  14. llama.cpp/.devops/cloud-v-pipeline +22 -0
  15. llama.cpp/.devops/cpu.Dockerfile +92 -0
  16. llama.cpp/.devops/cuda.Dockerfile +94 -0
  17. llama.cpp/.devops/intel.Dockerfile +95 -0
  18. llama.cpp/.devops/llama-cli-cann.Dockerfile +44 -0
  19. llama.cpp/.devops/llama-cpp-cuda.srpm.spec +83 -0
  20. llama.cpp/.devops/llama-cpp.srpm.spec +85 -0
  21. llama.cpp/.devops/musa.Dockerfile +101 -0
  22. llama.cpp/.devops/nix/apps.nix +21 -0
  23. llama.cpp/.devops/nix/devshells.nix +52 -0
  24. llama.cpp/.devops/nix/docker.nix +37 -0
  25. llama.cpp/.devops/nix/jetson-support.nix +39 -0
  26. llama.cpp/.devops/nix/nixpkgs-instances.nix +45 -0
  27. llama.cpp/.devops/nix/package-gguf-py.nix +36 -0
  28. llama.cpp/.devops/nix/package.nix +248 -0
  29. llama.cpp/.devops/nix/python-scripts.nix +66 -0
  30. llama.cpp/.devops/nix/scope.nix +41 -0
  31. llama.cpp/.devops/nix/sif.nix +27 -0
  32. llama.cpp/.devops/rocm.Dockerfile +113 -0
  33. llama.cpp/.devops/tools.sh +49 -0
  34. llama.cpp/.devops/vulkan.Dockerfile +89 -0
  35. llama.cpp/.dockerignore +20 -0
  36. llama.cpp/.ecrc +6 -0
  37. llama.cpp/.editorconfig +54 -0
  38. llama.cpp/.flake8 +18 -0
  39. llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +87 -0
  40. llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
  41. llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml +91 -0
  42. llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
  43. llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml +52 -0
  44. llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
  45. llama.cpp/.github/ISSUE_TEMPLATE/config.yml +11 -0
  46. llama.cpp/.github/actions/get-tag-name/action.yml +22 -0
  47. llama.cpp/.github/actions/windows-setup-cuda/action.yml +67 -0
  48. llama.cpp/.github/actions/windows-setup-curl/action.yml +30 -0
  49. llama.cpp/.github/labeler.yml +94 -0
  50. llama.cpp/.github/pull_request_template.md +1 -0
.gitattributes CHANGED
@@ -33,3 +33,117 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llama.cpp/.vs/llama.cpp/CopilotIndices/17.14.786.1071/CodeChunks.db filter=lfs diff=lfs merge=lfs -text
37
+ llama.cpp/.vs/llama.cpp/CopilotIndices/17.14.786.1071/SemanticSymbols.db filter=lfs diff=lfs merge=lfs -text
38
+ llama.cpp/.vs/llama.cpp/FileContentIndex/fa8fa901-0eee-48bf-a604-aa4561f07b11.vsidx filter=lfs diff=lfs merge=lfs -text
39
+ llama.cpp/.vs/llama.cpp/v17/Browse.VC.db filter=lfs diff=lfs merge=lfs -text
40
+ llama.cpp/.vs/slnx.sqlite filter=lfs diff=lfs merge=lfs -text
41
+ llama.cpp/build/bin/ggml-base.dll filter=lfs diff=lfs merge=lfs -text
42
+ llama.cpp/build/bin/ggml-base.ilk filter=lfs diff=lfs merge=lfs -text
43
+ llama.cpp/build/bin/ggml-base.pdb filter=lfs diff=lfs merge=lfs -text
44
+ llama.cpp/build/bin/ggml-cpu-alderlake.dll filter=lfs diff=lfs merge=lfs -text
45
+ llama.cpp/build/bin/ggml-cpu-haswell.dll filter=lfs diff=lfs merge=lfs -text
46
+ llama.cpp/build/bin/ggml-cpu-icelake.dll filter=lfs diff=lfs merge=lfs -text
47
+ llama.cpp/build/bin/ggml-cpu-sandybridge.dll filter=lfs diff=lfs merge=lfs -text
48
+ llama.cpp/build/bin/ggml-cpu-sapphirerapids.dll filter=lfs diff=lfs merge=lfs -text
49
+ llama.cpp/build/bin/ggml-cpu-skylakex.dll filter=lfs diff=lfs merge=lfs -text
50
+ llama.cpp/build/bin/ggml-cpu-sse42.dll filter=lfs diff=lfs merge=lfs -text
51
+ llama.cpp/build/bin/ggml-cpu-x64.dll filter=lfs diff=lfs merge=lfs -text
52
+ llama.cpp/build/bin/ggml-cpu.dll filter=lfs diff=lfs merge=lfs -text
53
+ llama.cpp/build/bin/ggml-cpu.ilk filter=lfs diff=lfs merge=lfs -text
54
+ llama.cpp/build/bin/ggml-cpu.pdb filter=lfs diff=lfs merge=lfs -text
55
+ llama.cpp/build/bin/ggml-rpc.dll filter=lfs diff=lfs merge=lfs -text
56
+ llama.cpp/build/bin/ggml.ilk filter=lfs diff=lfs merge=lfs -text
57
+ llama.cpp/build/bin/ggml.pdb filter=lfs diff=lfs merge=lfs -text
58
+ llama.cpp/build/bin/libcurl-x64.dll filter=lfs diff=lfs merge=lfs -text
59
+ llama.cpp/build/bin/libomp140.x86_64.dll filter=lfs diff=lfs merge=lfs -text
60
+ llama.cpp/build/bin/llama-batched-bench.exe filter=lfs diff=lfs merge=lfs -text
61
+ llama.cpp/build/bin/llama-bench.exe filter=lfs diff=lfs merge=lfs -text
62
+ llama.cpp/build/bin/llama-cli.exe filter=lfs diff=lfs merge=lfs -text
63
+ llama.cpp/build/bin/llama-completion.exe filter=lfs diff=lfs merge=lfs -text
64
+ llama.cpp/build/bin/llama-fit-params.exe filter=lfs diff=lfs merge=lfs -text
65
+ llama.cpp/build/bin/llama-imatrix.exe filter=lfs diff=lfs merge=lfs -text
66
+ llama.cpp/build/bin/llama-mtmd-cli.exe filter=lfs diff=lfs merge=lfs -text
67
+ llama.cpp/build/bin/llama-perplexity.exe filter=lfs diff=lfs merge=lfs -text
68
+ llama.cpp/build/bin/llama-quantize.exe filter=lfs diff=lfs merge=lfs -text
69
+ llama.cpp/build/bin/llama-run.exe filter=lfs diff=lfs merge=lfs -text
70
+ llama.cpp/build/bin/llama-server.exe filter=lfs diff=lfs merge=lfs -text
71
+ llama.cpp/build/bin/llama-server.ilk filter=lfs diff=lfs merge=lfs -text
72
+ llama.cpp/build/bin/llama-server.pdb filter=lfs diff=lfs merge=lfs -text
73
+ llama.cpp/build/bin/llama-tokenize.exe filter=lfs diff=lfs merge=lfs -text
74
+ llama.cpp/build/bin/llama-tts.exe filter=lfs diff=lfs merge=lfs -text
75
+ llama.cpp/build/bin/llama.dll filter=lfs diff=lfs merge=lfs -text
76
+ llama.cpp/build/bin/llama.ilk filter=lfs diff=lfs merge=lfs -text
77
+ llama.cpp/build/bin/llama.pdb filter=lfs diff=lfs merge=lfs -text
78
+ llama.cpp/build/bin/mtmd.dll filter=lfs diff=lfs merge=lfs -text
79
+ llama.cpp/build/bin/mtmd.ilk filter=lfs diff=lfs merge=lfs -text
80
+ llama.cpp/build/bin/mtmd.pdb filter=lfs diff=lfs merge=lfs -text
81
+ llama.cpp/build/bin/rpc-server.exe filter=lfs diff=lfs merge=lfs -text
82
+ llama.cpp/build/CMakeFiles/4.1.0-rc4/CompilerIdC/CMakeCCompilerId.exe filter=lfs diff=lfs merge=lfs -text
83
+ llama.cpp/build/CMakeFiles/4.1.0-rc4/CompilerIdCXX/CMakeCXXCompilerId.exe filter=lfs diff=lfs merge=lfs -text
84
+ llama.cpp/build/common/CMakeFiles/common.dir/arg.cpp.obj filter=lfs diff=lfs merge=lfs -text
85
+ llama.cpp/build/common/CMakeFiles/common.dir/chat-parser.cpp.obj filter=lfs diff=lfs merge=lfs -text
86
+ llama.cpp/build/common/CMakeFiles/common.dir/chat.cpp.obj filter=lfs diff=lfs merge=lfs -text
87
+ llama.cpp/build/common/CMakeFiles/common.dir/common.cpp.obj filter=lfs diff=lfs merge=lfs -text
88
+ llama.cpp/build/common/CMakeFiles/common.dir/common.pdb filter=lfs diff=lfs merge=lfs -text
89
+ llama.cpp/build/common/CMakeFiles/common.dir/console.cpp.obj filter=lfs diff=lfs merge=lfs -text
90
+ llama.cpp/build/common/CMakeFiles/common.dir/json-partial.cpp.obj filter=lfs diff=lfs merge=lfs -text
91
+ llama.cpp/build/common/CMakeFiles/common.dir/json-schema-to-grammar.cpp.obj filter=lfs diff=lfs merge=lfs -text
92
+ llama.cpp/build/common/CMakeFiles/common.dir/log.cpp.obj filter=lfs diff=lfs merge=lfs -text
93
+ llama.cpp/build/common/CMakeFiles/common.dir/ngram-cache.cpp.obj filter=lfs diff=lfs merge=lfs -text
94
+ llama.cpp/build/common/CMakeFiles/common.dir/regex-partial.cpp.obj filter=lfs diff=lfs merge=lfs -text
95
+ llama.cpp/build/common/CMakeFiles/common.dir/sampling.cpp.obj filter=lfs diff=lfs merge=lfs -text
96
+ llama.cpp/build/common/CMakeFiles/common.dir/speculative.cpp.obj filter=lfs diff=lfs merge=lfs -text
97
+ llama.cpp/build/common/common.lib filter=lfs diff=lfs merge=lfs -text
98
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml-backend.cpp.obj filter=lfs diff=lfs merge=lfs -text
99
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml-opt.cpp.obj filter=lfs diff=lfs merge=lfs -text
100
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml-quants.c.obj filter=lfs diff=lfs merge=lfs -text
101
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml.c.obj filter=lfs diff=lfs merge=lfs -text
102
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/gguf.cpp.obj filter=lfs diff=lfs merge=lfs -text
103
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
104
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/arch/x86/quants.c.obj filter=lfs diff=lfs merge=lfs -text
105
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/arch/x86/repack.cpp.obj filter=lfs diff=lfs merge=lfs -text
106
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/binary-ops.cpp.obj filter=lfs diff=lfs merge=lfs -text
107
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/ggml-cpu.c.obj filter=lfs diff=lfs merge=lfs -text
108
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/ggml-cpu.cpp.obj filter=lfs diff=lfs merge=lfs -text
109
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/llamafile/sgemm.cpp.obj filter=lfs diff=lfs merge=lfs -text
110
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/ops.cpp.obj filter=lfs diff=lfs merge=lfs -text
111
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/quants.c.obj filter=lfs diff=lfs merge=lfs -text
112
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/repack.cpp.obj filter=lfs diff=lfs merge=lfs -text
113
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/unary-ops.cpp.obj filter=lfs diff=lfs merge=lfs -text
114
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
115
+ llama.cpp/build/ggml/src/CMakeFiles/ggml.dir/ggml-backend-reg.cpp.obj filter=lfs diff=lfs merge=lfs -text
116
+ llama.cpp/build/ggml/src/CMakeFiles/ggml.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
117
+ llama.cpp/build/ggml/src/ggml-base.lib filter=lfs diff=lfs merge=lfs -text
118
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-adapter.cpp.obj filter=lfs diff=lfs merge=lfs -text
119
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-arch.cpp.obj filter=lfs diff=lfs merge=lfs -text
120
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-batch.cpp.obj filter=lfs diff=lfs merge=lfs -text
121
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-chat.cpp.obj filter=lfs diff=lfs merge=lfs -text
122
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-context.cpp.obj filter=lfs diff=lfs merge=lfs -text
123
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-grammar.cpp.obj filter=lfs diff=lfs merge=lfs -text
124
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-graph.cpp.obj filter=lfs diff=lfs merge=lfs -text
125
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-impl.cpp.obj filter=lfs diff=lfs merge=lfs -text
126
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-io.cpp.obj filter=lfs diff=lfs merge=lfs -text
127
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-kv-cache-unified-iswa.cpp.obj filter=lfs diff=lfs merge=lfs -text
128
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-kv-cache-unified.cpp.obj filter=lfs diff=lfs merge=lfs -text
129
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-memory-hybrid.cpp.obj filter=lfs diff=lfs merge=lfs -text
130
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-memory-recurrent.cpp.obj filter=lfs diff=lfs merge=lfs -text
131
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-mmap.cpp.obj filter=lfs diff=lfs merge=lfs -text
132
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-model-loader.cpp.obj filter=lfs diff=lfs merge=lfs -text
133
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-model-saver.cpp.obj filter=lfs diff=lfs merge=lfs -text
134
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-model.cpp.obj filter=lfs diff=lfs merge=lfs -text
135
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-quant.cpp.obj filter=lfs diff=lfs merge=lfs -text
136
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-sampling.cpp.obj filter=lfs diff=lfs merge=lfs -text
137
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-vocab.cpp.obj filter=lfs diff=lfs merge=lfs -text
138
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama.cpp.obj filter=lfs diff=lfs merge=lfs -text
139
+ llama.cpp/build/src/CMakeFiles/llama.dir/unicode-data.cpp.obj filter=lfs diff=lfs merge=lfs -text
140
+ llama.cpp/build/src/CMakeFiles/llama.dir/unicode.cpp.obj filter=lfs diff=lfs merge=lfs -text
141
+ llama.cpp/build/src/CMakeFiles/llama.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
142
+ llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/clip.cpp.obj filter=lfs diff=lfs merge=lfs -text
143
+ llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/mtmd-audio.cpp.obj filter=lfs diff=lfs merge=lfs -text
144
+ llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/mtmd-helper.cpp.obj filter=lfs diff=lfs merge=lfs -text
145
+ llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/mtmd.cpp.obj filter=lfs diff=lfs merge=lfs -text
146
+ llama.cpp/build/tools/mtmd/CMakeFiles/mtmd.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
147
+ llama.cpp/build/tools/server/CMakeFiles/llama-server.dir/server.cpp.obj filter=lfs diff=lfs merge=lfs -text
148
+ llama.cpp/build/tools/server/CMakeFiles/llama-server.dir/vc140.pdb filter=lfs diff=lfs merge=lfs -text
149
+ llama.cpp/docs/development/llama-star/idea-arch.key filter=lfs diff=lfs merge=lfs -text
.gitgnore ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ pip-wheel-metadata/
20
+ share/python-wheels/
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ env/
26
+ ENV/
27
+ .venv/
28
+
29
+ # IDE & Editors
30
+ .idea/
31
+ *.swp
32
+ *.swo
33
+ .DS_Store
34
+ .vscode/
35
+ *.sublime-project
36
+ *.sublime-workspace
37
+
38
+ # Logs & Databases
39
+ *.log
40
+ *.sqlite3
41
+
42
+ # Generated by app
43
+ response.wav
44
+ sandbox/*.py
45
+ !sandbox/.gitkeep
46
+
47
+ # Hugging Face cache
48
+ ~/.cache/huggingface/
49
+ .gitattributes
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: AI Coding Genius
3
- emoji: 🏃
4
- colorFrom: pink
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: AI-Coding-Genius
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 5.42.0
6
  ---
 
 
agents/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
agents/coder.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # agents/coder.py
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+
4
+ class CoderAgent:
5
+ def __init__(self, model_name="deepseek-ai/deepseek-coder-6.7b-instruct"):
6
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ self.model = AutoModelForCausalLM.from_pretrained(
8
+ model_name,
9
+ torch_dtype="auto",
10
+ device_map="auto"
11
+ )
12
+
13
+ def generate(self, prompt):
14
+ full_prompt = f"""
15
+ You're a brilliant, friendly AI coder. Explain clearly and write clean Python.
16
+ Include comments and use best practices.
17
+
18
+ Task: {prompt}
19
+ """.strip()
20
+ inputs = self.tokenizer(full_prompt, return_tensors="pt").to(self.model.device)
21
+ outputs = self.model.generate(**inputs, max_new_tokens=1024, temperature=0.4)
22
+ code = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
23
+ return code[len(full_prompt):].strip()
agents/reviewer.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # agents/reviewer.py
2
+ from transformers import pipeline
3
+
4
+ class ReviewerAgent:
5
+ def __init__(self, model_name="Qwen/Qwen2-7B-Instruct"):
6
+ self.pipe = pipeline(
7
+ "text-generation",
8
+ model=model_name,
9
+ torch_dtype="auto",
10
+ device_map="auto"
11
+ )
12
+
13
+ def review(self, code):
14
+ prompt = f"""
15
+ Review this Python code for:
16
+ - Bugs
17
+ - Performance
18
+ - Readability
19
+ - Best practices
20
+
21
+ Code:
22
+ {code}
23
+
24
+ Provide a clear, constructive review.
25
+ """
26
+ result = self.pipe(prompt, max_new_tokens=512)
27
+ return result[0]['generated_text']
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
5
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
+ import librosa
7
+ import soundfile as sf
8
+ import numpy as np
9
+ from llama_cpp import Llama
10
+ from huggingface_hub import hf_hub_download # Needed to get the model
11
+
12
+ # ─────────────────────────────────────────────────────────────
13
+ # 🧠 Load Qwen Coder (The Brain) - INSIDE Python now
14
+ # ─────────────────────────────────────────────────────────────
15
+ print("🧠 Downloading/Loading Qwen Model...")
16
+ try:
17
+ # 1. Download the model file from Hugging Face automatically
18
+ model_path = hf_hub_download(
19
+ repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
20
+ filename="qwen2.5-coder-1.5b-instruct-q8_0.gguf"
21
+ )
22
+
23
+ # 2. Load it directly into memory
24
+ llm = Llama(
25
+ model_path=model_path,
26
+ n_ctx=4096, # Context window
27
+ n_threads=2, # Use 2 CPU threads (good for free tier)
28
+ verbose=False
29
+ )
30
+ print("✅ Qwen Model Loaded Successfully!")
31
+ llm_ready = True
32
+ except Exception as e:
33
+ print(f"❌ Failed to load Qwen: {e}")
34
+ llm_ready = False
35
+
36
+ # ─────────────────────────────────────────────────────────────
37
+ # 🔊 Load Text-to-Speech (TTS)
38
+ # ─────────────────────────────────────────────────────────────
39
+ print("🔊 Loading TTS model...")
40
+ try:
41
+ tts_processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
42
+ tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
43
+ speaker_embeddings = torch.zeros(1, 512) # Safe fallback
44
+ tts_ready = True
45
+ print("✅ TTS loaded!")
46
+ except Exception as e:
47
+ print(f"❌ TTS failed to load: {e}")
48
+ tts_ready = False
49
+
50
+ # ─────────────────────────────────────────────────────────────
51
+ # 🎤 Load Speech-to-Text (STT) - Whisper Tiny
52
+ # ─────────────────────────────────────────────────────────────
53
+ print("🎤 Loading STT model (whisper-tiny)...")
54
+ try:
55
+ stt_processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
56
+ stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
57
+ stt_model.eval()
58
+ stt_ready = True
59
+ print("✅ STT loaded!")
60
+ except Exception as e:
61
+ print(f"❌ STT failed to load: {e}")
62
+ stt_ready = False
63
+
64
+ # Create folders
65
+ os.makedirs("assets", exist_ok=True)
66
+
67
+ # ─────────────────────────────────────────────────────────────
68
+ # 🎤 Convert Speech to Text
69
+ # ─────────────────────────────────────────────────────────────
70
+ def speech_to_text(audio):
71
+ if not stt_ready or audio is None:
72
+ return "Voice input not available."
73
+
74
+ try:
75
+ sample_rate, y = audio
76
+ if y.dtype != np.float32:
77
+ y = y.astype(np.float32) / 32768.0
78
+ if len(y.shape) > 1:
79
+ y = y.mean(axis=1)
80
+ if sample_rate != 16000:
81
+ y = librosa.resample(y, orig_sr=sample_rate, target_sr=16000)
82
+
83
+ inputs = stt_processor(y, sampling_rate=16000, return_tensors="pt")
84
+ outputs = stt_model.generate(inputs["input_features"])
85
+ text = stt_processor.batch_decode(outputs, skip_special_tokens=True)[0]
86
+ return text.strip()
87
+ except Exception as e:
88
+ return f"❌ STT Error: {str(e)}"
89
+
90
+ # ─────────────────────────────────────────────────────────────
91
+ # 💬 Generate Code (Now using Internal LLM)
92
+ # ─────────────────────────────────────────────────────────────
93
+ def generate_code(prompt):
94
+ if not prompt.strip():
95
+ yield "# 👋 Hello!", "Hi! I'm your AI coding partner.", None
96
+ return
97
+
98
+ if not llm_ready:
99
+ yield "# Error", "❌ Model failed to load. Check logs.", None
100
+ return
101
+
102
+ yield "# Thinking...", "🧠 AI is thinking...", None
103
+
104
+ try:
105
+ # Create the prompt in ChatML format
106
+ messages = [
107
+ {"role": "system", "content": "You are a helpful AI coder."},
108
+ {"role": "user", "content": prompt}
109
+ ]
110
+
111
+ # Ask the internal model to generate
112
+ output = llm.create_chat_completion(
113
+ messages=messages,
114
+ max_tokens=512,
115
+ temperature=0.4,
116
+ top_p=0.95,
117
+ stream=True
118
+ )
119
+
120
+ # Stream the response
121
+ raw_code = ""
122
+ for chunk in output:
123
+ if "content" in chunk["choices"][0]["delta"]:
124
+ text_chunk = chunk["choices"][0]["delta"]["content"]
125
+ raw_code += text_chunk
126
+ # Live update the code block
127
+ yield f"```python\n{raw_code}\n```", "🚀 Generating...", None
128
+
129
+ # Clean up code
130
+ clean_code = raw_code
131
+ if "```python" in clean_code:
132
+ clean_code = clean_code.split("```python")[1].split("```")[0].strip()
133
+ elif "```" in clean_code:
134
+ clean_code = clean_code.split("```")[1].split("```")[0].strip()
135
+
136
+ final_display = f"```python\n{clean_code}\n```"
137
+
138
+ # 🎙️ Generate voice
139
+ audio_path = None
140
+ if tts_ready:
141
+ try:
142
+ voice_text = f"Here is the code for {prompt[:20]}"
143
+ inputs_tts = tts_processor(text=voice_text, return_tensors="pt")
144
+ speech = tts_model.generate_speech(inputs_tts["input_ids"], speaker_embeddings)
145
+ audio_path = os.path.abspath("assets/response.wav")
146
+ sf.write(audio_path, speech.cpu().numpy(), samplerate=16000)
147
+ except Exception as e:
148
+ print(f"⚠️ TTS failed: {e}")
149
+
150
+ yield final_display, f"✅ Done!", audio_path
151
+
152
+ except Exception as e:
153
+ yield "print('Error')", f"❌ Error: {str(e)}", None
154
+
155
+ # ─────────────────────────────────────────────────────────────
156
+ # 💾 Save As Function
157
+ # ─────────────────────────────────────────────────────────────
158
+ def save_as_code(code, filename):
159
+ if not filename.strip():
160
+ filename = "ai_generated_code.py"
161
+ elif not filename.endswith(".py"):
162
+ filename += ".py"
163
+ try:
164
+ clean_code = code.replace("```python", "").replace("```", "").strip()
165
+ # In cloud, we just save to volatile memory, but this works for the demo
166
+ with open(filename, "w", encoding="utf-8") as f:
167
+ f.write(clean_code)
168
+ return f"💾 Saved (Temporary): {filename}"
169
+ except Exception as e:
170
+ return f"❌ Save failed: {str(e)}"
171
+
172
+ # ─────────────────────────────────────────────────────────────
173
+ # 🚀 Gradio UI
174
+ # ─────────────────────────────────────────────────────────────
175
+ with gr.Blocks(title="AI Coding Genius", theme=gr.themes.Soft()) as demo:
176
+ gr.Markdown("# 🤖 AI Coding Genius (Cloud Edition)")
177
+
178
+ with gr.Row():
179
+ stt_input = gr.Audio(label="🎤 Speak", type="numpy", format="wav")
180
+
181
+ inp = gr.Textbox(label="💬 Prompt", placeholder="Make a snake game...")
182
+ btn = gr.Button("🚀 Generate", variant="primary")
183
+
184
+ code_out = gr.Code(label="💻 Code", language="python", lines=15)
185
+ status_out = gr.Textbox(label="Status")
186
+ audio_out = gr.Audio(label="Voice", autoplay=True)
187
+
188
+ # Wiring
189
+ stt_input.change(speech_to_text, stt_input, inp)
190
+ btn.click(generate_code, inp, [code_out, status_out, audio_out])
191
+
192
+ if __name__ == "__main__":
193
+ # Removed share=True for Cloud deployment
194
+ demo.launch()
assets/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+
assets/response.wav ADDED
Binary file (18 kB). View file
 
llama.cpp/.clang-format ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Language: Cpp
3
+ AlignAfterOpenBracket: Align
4
+ AlignArrayOfStructures: Left
5
+ AlignConsecutiveAssignments: AcrossComments
6
+ AlignConsecutiveBitFields: AcrossComments
7
+ AlignConsecutiveDeclarations: AcrossComments
8
+ AlignConsecutiveMacros: AcrossComments
9
+ # AlignConsecutiveShortCaseStatements: AcrossComments
10
+ AlignEscapedNewlines: Left # LeftWithLastLine
11
+ AlignOperands: Align
12
+ AlignTrailingComments:
13
+ Kind: Always
14
+ OverEmptyLines: 1
15
+ AllowAllArgumentsOnNextLine: true
16
+ AllowAllParametersOfDeclarationOnNextLine: false
17
+ # AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
18
+ AllowShortBlocksOnASingleLine: Never
19
+ AllowShortCaseLabelsOnASingleLine: false
20
+ AllowShortFunctionsOnASingleLine: Inline
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLambdasOnASingleLine: Inline
23
+ AllowShortLoopsOnASingleLine: false
24
+ AlwaysBreakBeforeMultilineStrings: true
25
+ BinPackArguments: false
26
+ BinPackParameters: false # OnePerLine
27
+ BitFieldColonSpacing: Both
28
+ BreakBeforeBraces: Custom # Attach
29
+ BraceWrapping:
30
+ AfterCaseLabel: true
31
+ AfterClass: false
32
+ AfterControlStatement: false
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: false
46
+ SplitEmptyRecord: false
47
+ SplitEmptyNamespace: false
48
+ # BreakAdjacentStringLiterals: true
49
+ BreakAfterAttributes: Never
50
+ BreakBeforeBinaryOperators: None
51
+ BreakBeforeInlineASMColon: OnlyMultiline
52
+ BreakBeforeTernaryOperators: false
53
+ # BreakBinaryOperations: Never
54
+ BreakConstructorInitializers: AfterColon
55
+ # BreakFunctionDefinitionParameters: false
56
+ BreakInheritanceList: AfterComma
57
+ BreakStringLiterals: true
58
+ # BreakTemplateDeclarations: Yes
59
+ ColumnLimit: 120
60
+ CommentPragmas: '^ IWYU pragma:'
61
+ CompactNamespaces: false
62
+ ConstructorInitializerIndentWidth: 4
63
+ ContinuationIndentWidth: 4
64
+ Cpp11BracedListStyle: false
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ EmptyLineBeforeAccessModifier: Leave
68
+ EmptyLineAfterAccessModifier: Never
69
+ ExperimentalAutoDetectBinPacking: false
70
+ FixNamespaceComments: true
71
+ IncludeBlocks: Regroup
72
+ IncludeCategories:
73
+ - Regex: '".*"'
74
+ Priority: 1
75
+ SortPriority: 0
76
+ - Regex: '^<.*\.h>'
77
+ Priority: 2
78
+ SortPriority: 0
79
+ - Regex: '^<.*'
80
+ Priority: 3
81
+ SortPriority: 0
82
+ - Regex: '.*'
83
+ Priority: 4
84
+ SortPriority: 0
85
+ IncludeIsMainRegex: '([-_](test|unittest))?$'
86
+ IncludeIsMainSourceRegex: ''
87
+ IndentAccessModifiers: false
88
+ IndentCaseBlocks: true
89
+ IndentCaseLabels: true
90
+ IndentExternBlock: NoIndent
91
+ IndentGotoLabels: false
92
+ IndentPPDirectives: AfterHash
93
+ IndentWidth: 4
94
+ IndentWrappedFunctionNames: false
95
+ InsertBraces: true # NOTE: may lead to incorrect formatting
96
+ InsertNewlineAtEOF: true
97
+ JavaScriptQuotes: Leave
98
+ JavaScriptWrapImports: true
99
+ KeepEmptyLinesAtTheStartOfBlocks: false
100
+ LambdaBodyIndentation: Signature
101
+ LineEnding: LF
102
+ MacroBlockBegin: ''
103
+ MacroBlockEnd: ''
104
+ MaxEmptyLinesToKeep: 1
105
+ NamespaceIndentation: None
106
+ ObjCBinPackProtocolList: Auto
107
+ ObjCBlockIndentWidth: 4
108
+ ObjCSpaceAfterProperty: true
109
+ ObjCSpaceBeforeProtocolList: true
110
+ PPIndentWidth: -1
111
+ PackConstructorInitializers: CurrentLine
112
+ PenaltyBreakAssignment: 2
113
+ PenaltyBreakBeforeFirstCallParameter: 1
114
+ PenaltyBreakComment: 300
115
+ PenaltyBreakFirstLessLess: 120
116
+ PenaltyBreakString: 1000
117
+ PenaltyBreakTemplateDeclaration: 10
118
+ PenaltyExcessCharacter: 1000000
119
+ PenaltyReturnTypeOnItsOwnLine: 200
120
+ PointerAlignment: Middle
121
+ QualifierAlignment: Left
122
+ #QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
123
+ RawStringFormats:
124
+ - Language: Cpp
125
+ Delimiters:
126
+ - cc
127
+ - CC
128
+ - cpp
129
+ - Cpp
130
+ - CPP
131
+ - 'c++'
132
+ - 'C++'
133
+ CanonicalDelimiter: ''
134
+ ReferenceAlignment: Middle
135
+ ReflowComments: false # IndentOnly
136
+ SeparateDefinitionBlocks: Always
137
+ SortIncludes: CaseInsensitive
138
+ SortUsingDeclarations: LexicographicNumeric
139
+ SpaceAfterCStyleCast: true
140
+ SpaceAfterLogicalNot: false
141
+ SpaceAfterTemplateKeyword: true
142
+ SpaceBeforeAssignmentOperators: true
143
+ SpaceBeforeCpp11BracedList: false
144
+ SpaceBeforeCtorInitializerColon: true
145
+ SpaceBeforeInheritanceColon: true
146
+ SpaceBeforeParens: ControlStatements
147
+ SpaceBeforeRangeBasedForLoopColon: true
148
+ SpaceInEmptyBlock: false
149
+ SpaceInEmptyParentheses: false
150
+ SpacesBeforeTrailingComments: 2
151
+ SpacesInAngles: Never
152
+ SpacesInContainerLiterals: true
153
+ SpacesInLineCommentPrefix:
154
+ Minimum: 1
155
+ Maximum: -1
156
+ SpacesInParentheses: false
157
+ SpacesInSquareBrackets: false
158
+ SpaceBeforeSquareBrackets: false
159
+ Standard: c++17
160
+ TabWidth: 4
161
+ UseTab: Never
162
+ WhitespaceSensitiveMacros: ['STRINGIZE']
163
+ ...
164
+
llama.cpp/.clang-tidy ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Checks: >
3
+ bugprone-*,
4
+ -bugprone-easily-swappable-parameters,
5
+ -bugprone-implicit-widening-of-multiplication-result,
6
+ -bugprone-misplaced-widening-cast,
7
+ -bugprone-narrowing-conversions,
8
+ readability-*,
9
+ -readability-avoid-unconditional-preprocessor-if,
10
+ -readability-function-cognitive-complexity,
11
+ -readability-identifier-length,
12
+ -readability-implicit-bool-conversion,
13
+ -readability-magic-numbers,
14
+ -readability-uppercase-literal-suffix,
15
+ -readability-simplify-boolean-expr,
16
+ -readability-math-missing-parentheses,
17
+ clang-analyzer-*,
18
+ -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
19
+ performance-*,
20
+ portability-*,
21
+ -portability-simd-intrinsics,
22
+ misc-*,
23
+ -misc-const-correctness,
24
+ -misc-non-private-member-variables-in-classes,
25
+ -misc-no-recursion,
26
+ -misc-use-anonymous-namespace,
27
+ FormatStyle: none
llama.cpp/.devops/cann.Dockerfile ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==============================================================================
2
+ # ARGUMENTS
3
+ # ==============================================================================
4
+
5
+ # Define the CANN base image for easier version updates later
6
+ ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
7
+
8
+ # ==============================================================================
9
+ # BUILD STAGE
10
+ # Compile all binary files and libraries
11
+ # ==============================================================================
12
+ FROM ${CANN_BASE_IMAGE} AS build
13
+
14
+ # Define the Ascend chip model for compilation. Default is Ascend910B3
15
+ ARG ASCEND_SOC_TYPE=Ascend910B3
16
+
17
+ # -- Install build dependencies --
18
+ RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
19
+ yum clean all && \
20
+ rm -rf /var/cache/yum
21
+
22
+ # -- Set the working directory --
23
+ WORKDIR /app
24
+
25
+ # -- Copy project files --
26
+ COPY . .
27
+
28
+ # -- Set CANN environment variables (required for compilation) --
29
+ # Using ENV instead of `source` allows environment variables to persist across the entire image layer
30
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
31
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
32
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
33
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
34
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
35
+ # ... You can add other environment variables from the original file as needed ...
36
+ # For brevity, only core variables are listed here. You can paste the original ENV list here.
37
+
38
+ # -- Build llama.cpp --
39
+ # Use the passed ASCEND_SOC_TYPE argument and add general build options
40
+ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
41
+ && \
42
+ cmake -B build \
43
+ -DGGML_CANN=ON \
44
+ -DCMAKE_BUILD_TYPE=Release \
45
+ -DSOC_TYPE=${ASCEND_SOC_TYPE} \
46
+ . && \
47
+ cmake --build build --config Release -j$(nproc)
48
+
49
+ # -- Organize build artifacts for copying in later stages --
50
+ # Create a lib directory to store all .so files
51
+ RUN mkdir -p /app/lib && \
52
+ find build -name "*.so" -exec cp {} /app/lib \;
53
+
54
+ # Create a full directory to store all executables and Python scripts
55
+ RUN mkdir -p /app/full && \
56
+ cp build/bin/* /app/full/ && \
57
+ cp *.py /app/full/ && \
58
+ cp -r gguf-py /app/full/ && \
59
+ cp -r requirements /app/full/ && \
60
+ cp requirements.txt /app/full/
61
+ # If you have a tools.sh script, make sure it is copied here
62
+ # cp .devops/tools.sh /app/full/tools.sh
63
+
64
+ # ==============================================================================
65
+ # BASE STAGE
66
+ # Create a minimal base image with CANN runtime and common libraries
67
+ # ==============================================================================
68
+ FROM ${CANN_BASE_IMAGE} AS base
69
+
70
+ # -- Install runtime dependencies --
71
+ RUN yum install -y libgomp curl && \
72
+ yum clean all && \
73
+ rm -rf /var/cache/yum
74
+
75
+ # -- Set CANN environment variables (required for runtime) --
76
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
77
+ ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
78
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
79
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
80
+ # ... You can add other environment variables from the original file as needed ...
81
+
82
+ WORKDIR /app
83
+
84
+ # Copy compiled .so files from the build stage
85
+ COPY --from=build /app/lib/ /app
86
+
87
+ # ==============================================================================
88
+ # FINAL STAGES (TARGETS)
89
+ # ==============================================================================
90
+
91
+ ### Target: full
92
+ # Complete image with all tools, Python bindings, and dependencies
93
+ # ==============================================================================
94
+ FROM base AS full
95
+
96
+ COPY --from=build /app/full /app
97
+
98
+ # Install Python dependencies
99
+ RUN yum install -y git python3 python3-pip && \
100
+ pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
101
+ pip3 install --no-cache-dir -r requirements.txt && \
102
+ yum clean all && \
103
+ rm -rf /var/cache/yum
104
+
105
+ # You need to provide a tools.sh script as the entrypoint
106
+ ENTRYPOINT ["/app/tools.sh"]
107
+ # If there is no tools.sh, you can set the default to start the server
108
+ # ENTRYPOINT ["/app/llama-server"]
109
+
110
+ ### Target: light
111
+ # Lightweight image containing only llama-cli
112
+ # ==============================================================================
113
+ FROM base AS light
114
+
115
+ COPY --from=build /app/full/llama-cli /app
116
+
117
+ ENTRYPOINT [ "/app/llama-cli" ]
118
+
119
+ ### Target: server
120
+ # Dedicated server image containing only llama-server
121
+ # ==============================================================================
122
+ FROM base AS server
123
+
124
+ ENV LLAMA_ARG_HOST=0.0.0.0
125
+
126
+ COPY --from=build /app/full/llama-server /app
127
+
128
+ HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]
129
+
130
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/cloud-v-pipeline ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
2
+ stage('Cleanup'){
3
+ cleanWs() // Cleaning previous CI build in workspace
4
+ }
5
+ stage('checkout repo'){
6
+ retry(5){ // Retry if the cloning fails due to some reason
7
+ checkout scm // Clone the repo on Runner
8
+ }
9
+ }
10
+ stage('Compiling llama.cpp'){
11
+ sh'''#!/bin/bash
12
+ make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
13
+ '''
14
+ }
15
+ stage('Running llama.cpp'){
16
+ sh'''#!/bin/bash
17
+ module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
18
+ qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
19
+ cat llama_log.txt # Printing results
20
+ '''
21
+ }
22
+ }
llama.cpp/.devops/cpu.Dockerfile ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ ARG TARGETARCH
6
+
7
+ ARG GGML_CPU_ARM_ARCH=armv8-a
8
+
9
+ RUN apt-get update && \
10
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
11
+
12
+ WORKDIR /app
13
+
14
+ COPY . .
15
+
16
+ RUN if [ "$TARGETARCH" = "amd64" ]; then \
17
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18
+ elif [ "$TARGETARCH" = "arm64" ]; then \
19
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
20
+ else \
21
+ echo "Unsupported architecture"; \
22
+ exit 1; \
23
+ fi && \
24
+ cmake --build build -j $(nproc)
25
+
26
+ RUN mkdir -p /app/lib && \
27
+ find build -name "*.so" -exec cp {} /app/lib \;
28
+
29
+ RUN mkdir -p /app/full \
30
+ && cp build/bin/* /app/full \
31
+ && cp *.py /app/full \
32
+ && cp -r gguf-py /app/full \
33
+ && cp -r requirements /app/full \
34
+ && cp requirements.txt /app/full \
35
+ && cp .devops/tools.sh /app/full/tools.sh
36
+
37
+ ## Base image
38
+ FROM ubuntu:$UBUNTU_VERSION AS base
39
+
40
+ RUN apt-get update \
41
+ && apt-get install -y libgomp1 curl\
42
+ && apt autoremove -y \
43
+ && apt clean -y \
44
+ && rm -rf /tmp/* /var/tmp/* \
45
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
46
+ && find /var/cache -type f -delete
47
+
48
+ COPY --from=build /app/lib/ /app
49
+
50
+ ### Full
51
+ FROM base AS full
52
+
53
+ COPY --from=build /app/full /app
54
+
55
+ WORKDIR /app
56
+
57
+ RUN apt-get update \
58
+ && apt-get install -y \
59
+ git \
60
+ python3 \
61
+ python3-pip \
62
+ && pip install --upgrade pip setuptools wheel \
63
+ && pip install -r requirements.txt \
64
+ && apt autoremove -y \
65
+ && apt clean -y \
66
+ && rm -rf /tmp/* /var/tmp/* \
67
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
68
+ && find /var/cache -type f -delete
69
+
70
+ ENTRYPOINT ["/app/tools.sh"]
71
+
72
+ ### Light, CLI only
73
+ FROM base AS light
74
+
75
+ COPY --from=build /app/full/llama-cli /app
76
+
77
+ WORKDIR /app
78
+
79
+ ENTRYPOINT [ "/app/llama-cli" ]
80
+
81
+ ### Server, Server only
82
+ FROM base AS server
83
+
84
+ ENV LLAMA_ARG_HOST=0.0.0.0
85
+
86
+ COPY --from=build /app/full/llama-server /app
87
+
88
+ WORKDIR /app
89
+
90
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
91
+
92
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/cuda.Dockerfile ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.4.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+
11
+ # CUDA architecture to build for (defaults to all supported archs)
12
+ ARG CUDA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
22
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
23
+ fi && \
24
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
25
+ cmake --build build --config Release -j$(nproc)
26
+
27
+ RUN mkdir -p /app/lib && \
28
+ find build -name "*.so" -exec cp {} /app/lib \;
29
+
30
+ RUN mkdir -p /app/full \
31
+ && cp build/bin/* /app/full \
32
+ && cp *.py /app/full \
33
+ && cp -r gguf-py /app/full \
34
+ && cp -r requirements /app/full \
35
+ && cp requirements.txt /app/full \
36
+ && cp .devops/tools.sh /app/full/tools.sh
37
+
38
+ ## Base image
39
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS base
40
+
41
+ RUN apt-get update \
42
+ && apt-get install -y libgomp1 curl\
43
+ && apt autoremove -y \
44
+ && apt clean -y \
45
+ && rm -rf /tmp/* /var/tmp/* \
46
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
47
+ && find /var/cache -type f -delete
48
+
49
+ COPY --from=build /app/lib/ /app
50
+
51
+ ### Full
52
+ FROM base AS full
53
+
54
+ COPY --from=build /app/full /app
55
+
56
+ WORKDIR /app
57
+
58
+ RUN apt-get update \
59
+ && apt-get install -y \
60
+ git \
61
+ python3 \
62
+ python3-pip \
63
+ && pip install --upgrade pip setuptools wheel \
64
+ && pip install -r requirements.txt \
65
+ && apt autoremove -y \
66
+ && apt clean -y \
67
+ && rm -rf /tmp/* /var/tmp/* \
68
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
69
+ && find /var/cache -type f -delete
70
+
71
+
72
+ ENTRYPOINT ["/app/tools.sh"]
73
+
74
+ ### Light, CLI only
75
+ FROM base AS light
76
+
77
+ COPY --from=build /app/full/llama-cli /app
78
+
79
+ WORKDIR /app
80
+
81
+ ENTRYPOINT [ "/app/llama-cli" ]
82
+
83
+ ### Server, Server only
84
+ FROM base AS server
85
+
86
+ ENV LLAMA_ARG_HOST=0.0.0.0
87
+
88
+ COPY --from=build /app/full/llama-server /app
89
+
90
+ WORKDIR /app
91
+
92
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
93
+
94
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/intel.Dockerfile ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
2
+
3
+ ## Build Image
4
+
5
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
6
+
7
+ ARG GGML_SYCL_F16=OFF
8
+ RUN apt-get update && \
9
+ apt-get install -y git libcurl4-openssl-dev
10
+
11
+ WORKDIR /app
12
+
13
+ COPY . .
14
+
15
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
16
+ echo "GGML_SYCL_F16 is set" \
17
+ && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
18
+ fi && \
19
+ echo "Building with dynamic libs" && \
20
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
21
+ cmake --build build --config Release -j$(nproc)
22
+
23
+ RUN mkdir -p /app/lib && \
24
+ find build -name "*.so" -exec cp {} /app/lib \;
25
+
26
+ RUN mkdir -p /app/full \
27
+ && cp build/bin/* /app/full \
28
+ && cp *.py /app/full \
29
+ && cp -r gguf-py /app/full \
30
+ && cp -r requirements /app/full \
31
+ && cp requirements.txt /app/full \
32
+ && cp .devops/tools.sh /app/full/tools.sh
33
+
34
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
35
+
36
+ RUN apt-get update \
37
+ && apt-get install -y libgomp1 curl\
38
+ && apt autoremove -y \
39
+ && apt clean -y \
40
+ && rm -rf /tmp/* /var/tmp/* \
41
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
42
+ && find /var/cache -type f -delete
43
+
44
+ ### Full
45
+ FROM base AS full
46
+
47
+ COPY --from=build /app/lib/ /app
48
+ COPY --from=build /app/full /app
49
+
50
+ WORKDIR /app
51
+
52
+ RUN apt-get update && \
53
+ apt-get install -y \
54
+ git \
55
+ python3 \
56
+ python3-pip \
57
+ python3-venv && \
58
+ python3 -m venv /opt/venv && \
59
+ . /opt/venv/bin/activate && \
60
+ pip install --upgrade pip setuptools wheel && \
61
+ pip install -r requirements.txt && \
62
+ apt autoremove -y && \
63
+ apt clean -y && \
64
+ rm -rf /tmp/* /var/tmp/* && \
65
+ find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
66
+ find /var/cache -type f -delete
67
+
68
+ ENV PATH="/opt/venv/bin:$PATH"
69
+
70
+ ENTRYPOINT ["/app/tools.sh"]
71
+
72
+ ### Light, CLI only
73
+ FROM base AS light
74
+
75
+ COPY --from=build /app/lib/ /app
76
+ COPY --from=build /app/full/llama-cli /app
77
+
78
+ WORKDIR /app
79
+
80
+ ENTRYPOINT [ "/app/llama-cli" ]
81
+
82
+ ### Server, Server only
83
+ FROM base AS server
84
+
85
+ ENV LLAMA_ARG_HOST=0.0.0.0
86
+
87
+ COPY --from=build /app/lib/ /app
88
+ COPY --from=build /app/full/llama-server /app
89
+
90
+ WORKDIR /app
91
+
92
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
93
+
94
+ ENTRYPOINT [ "/app/llama-server" ]
95
+
llama.cpp/.devops/llama-cli-cann.Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
2
+
3
+ FROM ascendai/cann:$ASCEND_VERSION AS build
4
+
5
+ WORKDIR /app
6
+
7
+ COPY . .
8
+
9
+ RUN yum install -y gcc g++ cmake make libcurl-devel
10
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
11
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
12
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
13
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
14
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
15
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
16
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
17
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
18
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
19
+
20
+ # find libascend_hal.so, because the drive hasn`t been mounted.
21
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
22
+
23
+ RUN echo "Building with static libs" && \
24
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
26
+ cmake --build build --config Release --target llama-cli
27
+
28
+ # TODO: use image with NNRT
29
+ FROM ascendai/cann:$ASCEND_VERSION AS runtime
30
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
31
+
32
+ ENV LC_ALL=C.utf8
33
+
34
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
35
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
36
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
37
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
38
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
39
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
40
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
41
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
42
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
43
+
44
+ ENTRYPOINT ["/llama-cli" ]
llama.cpp/.devops/llama-cpp-cuda.srpm.spec ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - boeroboy@gmail.com
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13
+ # It is up to the user to install the correct vendor-specific support.
14
+
15
+ Name: llama.cpp-cuda
16
+ Version: %( date "+%%Y%%m%%d" )
17
+ Release: 1%{?dist}
18
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19
+ License: MIT
20
+ Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
21
+ BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22
+ Requires: cuda-toolkit
23
+ URL: https://github.com/ggml-org/llama.cpp
24
+
25
+ %define debug_package %{nil}
26
+ %define source_date_epoch_from_changelog 0
27
+
28
+ %description
29
+ CPU inference for Meta's Lllama2 models using default options.
30
+
31
+ %prep
32
+ %setup -n llama.cpp-master
33
+
34
+ %build
35
+ make -j GGML_CUDA=1
36
+
37
+ %install
38
+ mkdir -p %{buildroot}%{_bindir}/
39
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40
+ cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
41
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
42
+
43
+ mkdir -p %{buildroot}/usr/lib/systemd/system
44
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
45
+ [Unit]
46
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
47
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48
+
49
+ [Service]
50
+ Type=simple
51
+ EnvironmentFile=/etc/sysconfig/llama
52
+ ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
53
+ ExecReload=/bin/kill -s HUP $MAINPID
54
+ Restart=never
55
+
56
+ [Install]
57
+ WantedBy=default.target
58
+ EOF
59
+
60
+ mkdir -p %{buildroot}/etc/sysconfig
61
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63
+ EOF
64
+
65
+ %clean
66
+ rm -rf %{buildroot}
67
+ rm -rf %{_builddir}/*
68
+
69
+ %files
70
+ %{_bindir}/llama-cuda-cli
71
+ %{_bindir}/llama-cuda-server
72
+ %{_bindir}/llama-cuda-simple
73
+ /usr/lib/systemd/system/llamacuda.service
74
+ %config /etc/sysconfig/llama
75
+
76
+ %pre
77
+
78
+ %post
79
+
80
+ %preun
81
+ %postun
82
+
83
+ %changelog
llama.cpp/.devops/llama-cpp.srpm.spec ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - boeroboy@gmail.com
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # In the meantime, YYYYMMDD format will be used.
10
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
11
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
12
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
13
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
14
+ # It is up to the user to install the correct vendor-specific support.
15
+
16
+ Name: llama.cpp
17
+ Version: %( date "+%%Y%%m%%d" )
18
+ Release: 1%{?dist}
19
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
20
+ License: MIT
21
+ Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
22
+ BuildRequires: coreutils make gcc-c++ git libstdc++-devel
23
+ Requires: libstdc++
24
+ URL: https://github.com/ggml-org/llama.cpp
25
+
26
+ %define debug_package %{nil}
27
+ %define source_date_epoch_from_changelog 0
28
+
29
+ %description
30
+ CPU inference for Meta's Lllama2 models using default options.
31
+ Models are not included in this package and must be downloaded separately.
32
+
33
+ %prep
34
+ %setup -n llama.cpp-master
35
+
36
+ %build
37
+ make -j
38
+
39
+ %install
40
+ mkdir -p %{buildroot}%{_bindir}/
41
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42
+ cp -p llama-server %{buildroot}%{_bindir}/llama-server
43
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
44
+
45
+ mkdir -p %{buildroot}/usr/lib/systemd/system
46
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
47
+ [Unit]
48
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
49
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
50
+
51
+ [Service]
52
+ Type=simple
53
+ EnvironmentFile=/etc/sysconfig/llama
54
+ ExecStart=/usr/bin/llama-server $LLAMA_ARGS
55
+ ExecReload=/bin/kill -s HUP $MAINPID
56
+ Restart=never
57
+
58
+ [Install]
59
+ WantedBy=default.target
60
+ EOF
61
+
62
+ mkdir -p %{buildroot}/etc/sysconfig
63
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
64
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
65
+ EOF
66
+
67
+ %clean
68
+ rm -rf %{buildroot}
69
+ rm -rf %{_builddir}/*
70
+
71
+ %files
72
+ %{_bindir}/llama-cli
73
+ %{_bindir}/llama-server
74
+ %{_bindir}/llama-simple
75
+ /usr/lib/systemd/system/llama.service
76
+ %config /etc/sysconfig/llama
77
+
78
+ %pre
79
+
80
+ %post
81
+
82
+ %preun
83
+ %postun
84
+
85
+ %changelog
llama.cpp/.devops/musa.Dockerfile ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc4.2.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
6
+
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+
11
+ # MUSA architecture to build for (defaults to all supported archs)
12
+ ARG MUSA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y \
16
+ build-essential \
17
+ cmake \
18
+ python3 \
19
+ python3-pip \
20
+ git \
21
+ libcurl4-openssl-dev \
22
+ libgomp1
23
+
24
+ WORKDIR /app
25
+
26
+ COPY . .
27
+
28
+ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
29
+ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
30
+ fi && \
31
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
32
+ cmake --build build --config Release -j$(nproc)
33
+
34
+ RUN mkdir -p /app/lib && \
35
+ find build -name "*.so" -exec cp {} /app/lib \;
36
+
37
+ RUN mkdir -p /app/full \
38
+ && cp build/bin/* /app/full \
39
+ && cp *.py /app/full \
40
+ && cp -r gguf-py /app/full \
41
+ && cp -r requirements /app/full \
42
+ && cp requirements.txt /app/full \
43
+ && cp .devops/tools.sh /app/full/tools.sh
44
+
45
+ ## Base image
46
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS base
47
+
48
+ RUN apt-get update \
49
+ && apt-get install -y libgomp1 curl\
50
+ && apt autoremove -y \
51
+ && apt clean -y \
52
+ && rm -rf /tmp/* /var/tmp/* \
53
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
54
+ && find /var/cache -type f -delete
55
+
56
+ COPY --from=build /app/lib/ /app
57
+
58
+ ### Full
59
+ FROM base AS full
60
+
61
+ COPY --from=build /app/full /app
62
+
63
+ WORKDIR /app
64
+
65
+ RUN apt-get update \
66
+ && apt-get install -y \
67
+ git \
68
+ python3 \
69
+ python3-pip \
70
+ && pip install --upgrade pip setuptools wheel \
71
+ && pip install -r requirements.txt \
72
+ && apt autoremove -y \
73
+ && apt clean -y \
74
+ && rm -rf /tmp/* /var/tmp/* \
75
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
76
+ && find /var/cache -type f -delete
77
+
78
+
79
+ ENTRYPOINT ["/app/tools.sh"]
80
+
81
+ ### Light, CLI only
82
+ FROM base AS light
83
+
84
+ COPY --from=build /app/full/llama-cli /app
85
+
86
+ WORKDIR /app
87
+
88
+ ENTRYPOINT [ "/app/llama-cli" ]
89
+
90
+ ### Server, Server only
91
+ FROM base AS server
92
+
93
+ ENV LLAMA_ARG_HOST=0.0.0.0
94
+
95
+ COPY --from=build /app/full/llama-server /app
96
+
97
+ WORKDIR /app
98
+
99
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
100
+
101
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/nix/apps.nix ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ perSystem =
3
+ { config, lib, ... }:
4
+ {
5
+ apps =
6
+ let
7
+ inherit (config.packages) default;
8
+ binaries = [
9
+ "llama-cli"
10
+ "llama-embedding"
11
+ "llama-server"
12
+ "llama-quantize"
13
+ ];
14
+ mkApp = name: {
15
+ type = "app";
16
+ program = "${default}/bin/${name}";
17
+ };
18
+ in
19
+ lib.genAttrs binaries mkApp;
20
+ };
21
+ }
llama.cpp/.devops/nix/devshells.nix ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+
3
+ {
4
+ perSystem =
5
+ {
6
+ config,
7
+ lib,
8
+ system,
9
+ ...
10
+ }:
11
+ {
12
+ devShells =
13
+ let
14
+ pkgs = import inputs.nixpkgs { inherit system; };
15
+ stdenv = pkgs.stdenv;
16
+ scripts = config.packages.python-scripts;
17
+ in
18
+ lib.pipe (config.packages) [
19
+ (lib.concatMapAttrs (
20
+ name: package: {
21
+ ${name} = pkgs.mkShell {
22
+ name = "${name}";
23
+ inputsFrom = [ package ];
24
+ shellHook = ''
25
+ echo "Entering ${name} devShell"
26
+ '';
27
+ };
28
+ "${name}-extra" =
29
+ if (name == "python-scripts") then
30
+ null
31
+ else
32
+ pkgs.mkShell {
33
+ name = "${name}-extra";
34
+ inputsFrom = [
35
+ package
36
+ scripts
37
+ ];
38
+ # Extra packages that *may* be used by some scripts
39
+ packages = [
40
+ pkgs.python3Packages.tiktoken
41
+ ];
42
+ shellHook = ''
43
+ echo "Entering ${name} devShell"
44
+ addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
45
+ '';
46
+ };
47
+ }
48
+ ))
49
+ (lib.filterAttrs (name: value: value != null))
50
+ ];
51
+ };
52
+ }
llama.cpp/.devops/nix/docker.nix ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ dockerTools,
4
+ buildEnv,
5
+ llama-cpp,
6
+ interactive ? true,
7
+ coreutils,
8
+ }:
9
+
10
+ # A tar that can be fed into `docker load`:
11
+ #
12
+ # $ nix build .#llamaPackages.docker
13
+ # $ docker load < result
14
+
15
+ # For details and variations cf.
16
+ # - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
17
+ # - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
18
+ # - https://nixery.dev/
19
+
20
+ # Approximate (compressed) sizes, at the time of writing, are:
21
+ #
22
+ # .#llamaPackages.docker: 125M;
23
+ # .#llamaPackagesCuda.docker: 537M;
24
+ # .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
25
+
26
+ dockerTools.buildLayeredImage {
27
+ name = llama-cpp.pname;
28
+ tag = "latest";
29
+
30
+ contents =
31
+ [ llama-cpp ]
32
+ ++ lib.optionals interactive [
33
+ coreutils
34
+ dockerTools.binSh
35
+ dockerTools.caCertificates
36
+ ];
37
+ }
llama.cpp/.devops/nix/jetson-support.nix ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ perSystem =
4
+ {
5
+ config,
6
+ system,
7
+ lib,
8
+ pkgsCuda,
9
+ ...
10
+ }:
11
+ {
12
+ legacyPackages =
13
+ let
14
+ caps.llamaPackagesXavier = "7.2";
15
+ caps.llamaPackagesOrin = "8.7";
16
+ caps.llamaPackagesTX2 = "6.2";
17
+ caps.llamaPackagesNano = "5.3";
18
+
19
+ pkgsFor =
20
+ cap:
21
+ import inputs.nixpkgs {
22
+ inherit system;
23
+ config = {
24
+ cudaSupport = true;
25
+ cudaCapabilities = [ cap ];
26
+ cudaEnableForwardCompat = false;
27
+ inherit (pkgsCuda.config) allowUnfreePredicate;
28
+ };
29
+ };
30
+ in
31
+ builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
32
+
33
+ packages = lib.optionalAttrs (system == "aarch64-linux") {
34
+ jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
35
+ jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
36
+ jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
37
+ };
38
+ };
39
+ }
llama.cpp/.devops/nix/nixpkgs-instances.nix ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ # The _module.args definitions are passed on to modules as arguments. E.g.
4
+ # the module `{ pkgs ... }: { /* config */ }` implicitly uses
5
+ # `_module.args.pkgs` (defined in this case by flake-parts).
6
+ perSystem =
7
+ { system, ... }:
8
+ {
9
+ _module.args = {
10
+ # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
11
+ # again, the below creates several nixpkgs instances which the
12
+ # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
13
+ #
14
+ # This is currently "slow" and "expensive", on a certain scale.
15
+ # This also isn't "right" in that this hinders dependency injection at
16
+ # the level of flake inputs. This might get removed in the foreseeable
17
+ # future.
18
+ #
19
+ # Note that you can use these expressions without Nix
20
+ # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
21
+
22
+ pkgsCuda = import inputs.nixpkgs {
23
+ inherit system;
24
+ # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
25
+ # and ucx are built with CUDA support)
26
+ config.cudaSupport = true;
27
+ config.allowUnfreePredicate =
28
+ p:
29
+ builtins.all (
30
+ license:
31
+ license.free
32
+ || builtins.elem license.shortName [
33
+ "CUDA EULA"
34
+ "cuDNN EULA"
35
+ ]
36
+ ) (p.meta.licenses or [ p.meta.license ]);
37
+ };
38
+ # Ensure dependencies use ROCm consistently
39
+ pkgsRocm = import inputs.nixpkgs {
40
+ inherit system;
41
+ config.rocmSupport = true;
42
+ };
43
+ };
44
+ };
45
+ }
llama.cpp/.devops/nix/package-gguf-py.nix ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ llamaVersion,
4
+ numpy,
5
+ tqdm,
6
+ sentencepiece,
7
+ pyyaml,
8
+ poetry-core,
9
+ buildPythonPackage,
10
+ pytestCheckHook,
11
+ }:
12
+
13
+ buildPythonPackage {
14
+ pname = "gguf";
15
+ version = llamaVersion;
16
+ pyproject = true;
17
+ nativeBuildInputs = [ poetry-core ];
18
+ propagatedBuildInputs = [
19
+ numpy
20
+ tqdm
21
+ sentencepiece
22
+ pyyaml
23
+ ];
24
+ src = lib.cleanSource ../../gguf-py;
25
+ pythonImportsCheck = [
26
+ "numpy"
27
+ "gguf"
28
+ ];
29
+ nativeCheckInputs = [ pytestCheckHook ];
30
+ doCheck = true;
31
+ meta = with lib; {
32
+ description = "Python package for writing binary files in the GGUF format";
33
+ license = licenses.mit;
34
+ maintainers = [ maintainers.ditsuke ];
35
+ };
36
+ }
llama.cpp/.devops/nix/package.nix ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ glibc,
4
+ config,
5
+ stdenv,
6
+ runCommand,
7
+ cmake,
8
+ ninja,
9
+ pkg-config,
10
+ git,
11
+ mpi,
12
+ blas,
13
+ cudaPackages,
14
+ autoAddDriverRunpath,
15
+ darwin,
16
+ rocmPackages,
17
+ vulkan-headers,
18
+ vulkan-loader,
19
+ curl,
20
+ shaderc,
21
+ useBlas ?
22
+ builtins.all (x: !x) [
23
+ useCuda
24
+ useMetalKit
25
+ useRocm
26
+ useVulkan
27
+ ]
28
+ && blas.meta.available,
29
+ useCuda ? config.cudaSupport,
30
+ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
31
+ # Increases the runtime closure size by ~700M
32
+ useMpi ? false,
33
+ useRocm ? config.rocmSupport,
34
+ rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
35
+ enableCurl ? true,
36
+ useVulkan ? false,
37
+ llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
38
+
39
+ # It's necessary to consistently use backendStdenv when building with CUDA support,
40
+ # otherwise we get libstdc++ errors downstream.
41
+ effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
42
+ enableStatic ? effectiveStdenv.hostPlatform.isStatic,
43
+ precompileMetalShaders ? false,
44
+ }:
45
+
46
+ let
47
+ inherit (lib)
48
+ cmakeBool
49
+ cmakeFeature
50
+ optionalAttrs
51
+ optionals
52
+ strings
53
+ ;
54
+
55
+ stdenv = throw "Use effectiveStdenv instead";
56
+
57
+ suffices =
58
+ lib.optionals useBlas [ "BLAS" ]
59
+ ++ lib.optionals useCuda [ "CUDA" ]
60
+ ++ lib.optionals useMetalKit [ "MetalKit" ]
61
+ ++ lib.optionals useMpi [ "MPI" ]
62
+ ++ lib.optionals useRocm [ "ROCm" ]
63
+ ++ lib.optionals useVulkan [ "Vulkan" ];
64
+
65
+ pnameSuffix =
66
+ strings.optionalString (suffices != [ ])
67
+ "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
68
+ descriptionSuffix = strings.optionalString (
69
+ suffices != [ ]
70
+ ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
71
+
72
+ xcrunHost = runCommand "xcrunHost" { } ''
73
+ mkdir -p $out/bin
74
+ ln -s /usr/bin/xcrun $out/bin
75
+ '';
76
+
77
+ # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
78
+ # separately
79
+ darwinBuildInputs =
80
+ with darwin.apple_sdk.frameworks;
81
+ [
82
+ Accelerate
83
+ CoreVideo
84
+ CoreGraphics
85
+ ]
86
+ ++ optionals useMetalKit [ MetalKit ];
87
+
88
+ cudaBuildInputs = with cudaPackages; [
89
+ cuda_cudart
90
+ cuda_cccl # <nv/target>
91
+ libcublas
92
+ ];
93
+
94
+ rocmBuildInputs = with rocmPackages; [
95
+ clr
96
+ hipblas
97
+ rocblas
98
+ ];
99
+
100
+ vulkanBuildInputs = [
101
+ vulkan-headers
102
+ vulkan-loader
103
+ shaderc
104
+ ];
105
+ in
106
+
107
+ effectiveStdenv.mkDerivation (finalAttrs: {
108
+ pname = "llama-cpp${pnameSuffix}";
109
+ version = llamaVersion;
110
+
111
+ # Note: none of the files discarded here are visible in the sandbox or
112
+ # affect the output hash. This also means they can be modified without
113
+ # triggering a rebuild.
114
+ src = lib.cleanSourceWith {
115
+ filter =
116
+ name: type:
117
+ let
118
+ noneOf = builtins.all (x: !x);
119
+ baseName = baseNameOf name;
120
+ in
121
+ noneOf [
122
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
123
+ (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
124
+ (lib.hasPrefix "." baseName) # Skip hidden files and directories
125
+ (baseName == "flake.lock")
126
+ ];
127
+ src = lib.cleanSource ../../.;
128
+ };
129
+
130
+ postPatch = ''
131
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
132
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
133
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
134
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
135
+ '';
136
+
137
+ # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
138
+ # `default.metallib` may be compiled with Metal compiler from XCode
139
+ # and we need to escape sandbox on MacOS to access Metal compiler.
140
+ # `xcrun` is used find the path of the Metal compiler, which is varible
141
+ # and not on $PATH
142
+ # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
143
+ __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
144
+
145
+ nativeBuildInputs =
146
+ [
147
+ cmake
148
+ ninja
149
+ pkg-config
150
+ git
151
+ ]
152
+ ++ optionals useCuda [
153
+ cudaPackages.cuda_nvcc
154
+
155
+ autoAddDriverRunpath
156
+ ]
157
+ ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
158
+ ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
159
+
160
+ buildInputs =
161
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
162
+ ++ optionals useCuda cudaBuildInputs
163
+ ++ optionals useMpi [ mpi ]
164
+ ++ optionals useRocm rocmBuildInputs
165
+ ++ optionals useBlas [ blas ]
166
+ ++ optionals useVulkan vulkanBuildInputs
167
+ ++ optionals enableCurl [ curl ];
168
+
169
+ cmakeFlags =
170
+ [
171
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
172
+ (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
173
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
174
+ (cmakeBool "LLAMA_CURL" enableCurl)
175
+ (cmakeBool "GGML_NATIVE" false)
176
+ (cmakeBool "GGML_BLAS" useBlas)
177
+ (cmakeBool "GGML_CUDA" useCuda)
178
+ (cmakeBool "GGML_HIP" useRocm)
179
+ (cmakeBool "GGML_METAL" useMetalKit)
180
+ (cmakeBool "GGML_VULKAN" useVulkan)
181
+ (cmakeBool "GGML_STATIC" enableStatic)
182
+ ]
183
+ ++ optionals useCuda [
184
+ (
185
+ with cudaPackages.flags;
186
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
187
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
188
+ )
189
+ )
190
+ ]
191
+ ++ optionals useRocm [
192
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
193
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
194
+ ]
195
+ ++ optionals useMetalKit [
196
+ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
197
+ (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
198
+ ];
199
+
200
+ # Environment variables needed for ROCm
201
+ env = optionalAttrs useRocm {
202
+ ROCM_PATH = "${rocmPackages.clr}";
203
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
204
+ };
205
+
206
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
207
+ # if they haven't been added yet.
208
+ postInstall = ''
209
+ mkdir -p $out/include
210
+ cp $src/include/llama.h $out/include/
211
+ '';
212
+
213
+ meta = {
214
+ # Configurations we don't want even the CI to evaluate. Results in the
215
+ # "unsupported platform" messages. This is mostly a no-op, because
216
+ # cudaPackages would've refused to evaluate anyway.
217
+ badPlatforms = optionals useCuda lib.platforms.darwin;
218
+
219
+ # Configurations that are known to result in build failures. Can be
220
+ # overridden by importing Nixpkgs with `allowBroken = true`.
221
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
222
+
223
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
224
+ homepage = "https://github.com/ggml-org/llama.cpp/";
225
+ license = lib.licenses.mit;
226
+
227
+ # Accommodates `nix run` and `lib.getExe`
228
+ mainProgram = "llama-cli";
229
+
230
+ # These people might respond, on the best effort basis, if you ping them
231
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
232
+ # Consider adding yourself to this list if you want to ensure this flake
233
+ # stays maintained and you're willing to invest your time. Do not add
234
+ # other people without their consent. Consider removing people after
235
+ # they've been unreachable for long periods of time.
236
+
237
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
238
+ # an attrset following the same format as in
239
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
240
+ maintainers = with lib.maintainers; [
241
+ philiptaron
242
+ SomeoneSerge
243
+ ];
244
+
245
+ # Extend `badPlatforms` instead
246
+ platforms = lib.platforms.all;
247
+ };
248
+ })
llama.cpp/.devops/nix/python-scripts.nix ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ stdenv,
4
+ buildPythonPackage,
5
+ poetry-core,
6
+ mkShell,
7
+ python3Packages,
8
+ gguf-py,
9
+ }@inputs:
10
+
11
+ let
12
+ llama-python-deps = with python3Packages; [
13
+ numpy
14
+ sentencepiece
15
+ transformers
16
+ protobuf
17
+ torchWithoutCuda
18
+ gguf-py
19
+ tqdm
20
+
21
+ # for scripts/compare-llama-bench.py
22
+ gitpython
23
+ tabulate
24
+
25
+ # for examples/pydantic-models-to-grammar-examples.py
26
+ docstring-parser
27
+ pydantic
28
+
29
+ ];
30
+
31
+ llama-python-test-deps = with python3Packages; [
32
+ # Server bench
33
+ matplotlib
34
+
35
+ # server tests
36
+ openai
37
+ pytest
38
+ prometheus-client
39
+ ];
40
+ in
41
+
42
+ buildPythonPackage ({
43
+ pname = "llama-scripts";
44
+ version = "0.0.0";
45
+ pyproject = true;
46
+
47
+ # NOTE: The files filtered out here are not visible in the build sandbox, neither
48
+ # do they affect the output hash. They can be modified without triggering a rebuild.
49
+ src = lib.cleanSourceWith {
50
+ filter =
51
+ name: type:
52
+ let
53
+ any = builtins.any (x: x);
54
+ baseName = builtins.baseNameOf name;
55
+ in
56
+ any [
57
+ (lib.hasSuffix ".py" name)
58
+ (baseName == "README.md")
59
+ (baseName == "pyproject.toml")
60
+ ];
61
+ src = lib.cleanSource ../../.;
62
+ };
63
+ nativeBuildInputs = [ poetry-core ];
64
+ nativeCheckInputs = llama-python-test-deps;
65
+ dependencies = llama-python-deps;
66
+ })
llama.cpp/.devops/nix/scope.nix ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ newScope,
4
+ python3,
5
+ llamaVersion ? "0.0.0",
6
+ }:
7
+
8
+ let
9
+ pythonPackages = python3.pkgs;
10
+ buildPythonPackage = pythonPackages.buildPythonPackage;
11
+ numpy = pythonPackages.numpy;
12
+ tqdm = pythonPackages.tqdm;
13
+ sentencepiece = pythonPackages.sentencepiece;
14
+ pyyaml = pythonPackages.pyyaml;
15
+ poetry-core = pythonPackages.poetry-core;
16
+ pytestCheckHook = pythonPackages.pytestCheckHook;
17
+ in
18
+
19
+ # We're using `makeScope` instead of just writing out an attrset
20
+ # because it allows users to apply overlays later using `overrideScope'`.
21
+ # Cf. https://noogle.dev/f/lib/makeScope
22
+
23
+ lib.makeScope newScope (self: {
24
+ inherit llamaVersion;
25
+ gguf-py = self.callPackage ./package-gguf-py.nix {
26
+ inherit
27
+ buildPythonPackage
28
+ numpy
29
+ tqdm
30
+ sentencepiece
31
+ poetry-core
32
+ pyyaml
33
+ pytestCheckHook
34
+ ;
35
+ };
36
+ python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
37
+ llama-cpp = self.callPackage ./package.nix { };
38
+ docker = self.callPackage ./docker.nix { };
39
+ docker-min = self.callPackage ./docker.nix { interactive = false; };
40
+ sif = self.callPackage ./sif.nix { };
41
+ })
llama.cpp/.devops/nix/sif.nix ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ singularity-tools,
4
+ llama-cpp,
5
+ bashInteractive,
6
+ interactive ? false,
7
+ }:
8
+
9
+ let
10
+ optionalInt = cond: x: if cond then x else 0;
11
+ in
12
+ singularity-tools.buildImage rec {
13
+ inherit (llama-cpp) name;
14
+ contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
15
+
16
+ # These are excessive (but safe) for most variants. Building singularity
17
+ # images requires superuser privileges, so we build them inside a VM in a
18
+ # writable image of pre-determined size.
19
+ #
20
+ # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
21
+ #
22
+ # Expected image sizes:
23
+ # - cpu/blas: 150M,
24
+ # - cuda, all gencodes: 560M,
25
+ diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
26
+ memSize = diskSize;
27
+ }
llama.cpp/.devops/rocm.Dockerfile ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=24.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=6.4
5
+ ARG AMDGPU_VERSION=6.4
6
+
7
+ # Target the CUDA build image
8
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
9
+
10
+ ### Build image
11
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
12
+
13
+ # Unless otherwise specified, we make a fat build.
14
+ # List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
15
+ # This is mostly tied to rocBLAS supported archs.
16
+ # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17
+ # gfx906 is deprecated
18
+ #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
19
+
20
+ ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21
+ #ARG ROCM_DOCKER_ARCH=gfx1100
22
+
23
+ # Set nvcc architectured
24
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
25
+ # Enable ROCm
26
+ # ENV CC=/opt/rocm/llvm/bin/clang
27
+ # ENV CXX=/opt/rocm/llvm/bin/clang++
28
+
29
+ RUN apt-get update \
30
+ && apt-get install -y \
31
+ build-essential \
32
+ cmake \
33
+ git \
34
+ libcurl4-openssl-dev \
35
+ curl \
36
+ libgomp1
37
+
38
+ WORKDIR /app
39
+
40
+ COPY . .
41
+
42
+ RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43
+ cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
44
+ && cmake --build build --config Release -j$(nproc)
45
+
46
+ RUN mkdir -p /app/lib \
47
+ && find build -name "*.so" -exec cp {} /app/lib \;
48
+
49
+ RUN mkdir -p /app/full \
50
+ && cp build/bin/* /app/full \
51
+ && cp *.py /app/full \
52
+ && cp -r gguf-py /app/full \
53
+ && cp -r requirements /app/full \
54
+ && cp requirements.txt /app/full \
55
+ && cp .devops/tools.sh /app/full/tools.sh
56
+
57
+ ## Base image
58
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS base
59
+
60
+ RUN apt-get update \
61
+ && apt-get install -y libgomp1 curl\
62
+ && apt autoremove -y \
63
+ && apt clean -y \
64
+ && rm -rf /tmp/* /var/tmp/* \
65
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
66
+ && find /var/cache -type f -delete
67
+
68
+ COPY --from=build /app/lib/ /app
69
+
70
+ ### Full
71
+ FROM base AS full
72
+
73
+ COPY --from=build /app/full /app
74
+
75
+ WORKDIR /app
76
+
77
+ RUN apt-get update \
78
+ && apt-get install -y \
79
+ git \
80
+ python3-pip \
81
+ python3 \
82
+ python3-wheel\
83
+ && pip install --break-system-packages --upgrade setuptools \
84
+ && pip install --break-system-packages -r requirements.txt \
85
+ && apt autoremove -y \
86
+ && apt clean -y \
87
+ && rm -rf /tmp/* /var/tmp/* \
88
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
89
+ && find /var/cache -type f -delete
90
+
91
+ ENTRYPOINT ["/app/tools.sh"]
92
+
93
+ ### Light, CLI only
94
+ FROM base AS light
95
+
96
+ COPY --from=build /app/full/llama-cli /app
97
+
98
+ WORKDIR /app
99
+
100
+ ENTRYPOINT [ "/app/llama-cli" ]
101
+
102
+ ### Server, Server only
103
+ FROM base AS server
104
+
105
+ ENV LLAMA_ARG_HOST=0.0.0.0
106
+
107
+ COPY --from=build /app/full/llama-server /app
108
+
109
+ WORKDIR /app
110
+
111
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
112
+
113
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/tools.sh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ # Read the first argument into a variable
5
+ arg1="$1"
6
+
7
+ # Shift the arguments to remove the first one
8
+ shift
9
+
10
+ if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
11
+ exec python3 ./convert_hf_to_gguf.py "$@"
12
+ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
13
+ exec ./llama-quantize "$@"
14
+ elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
15
+ exec ./llama-cli "$@"
16
+ elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
17
+ exec ./llama-bench "$@"
18
+ elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
19
+ exec ./llama-perplexity "$@"
20
+ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
21
+ echo "Converting PTH to GGML..."
22
+ for i in $(ls $1/$2/ggml-model-f16.bin*); do
23
+ if [ -f "${i/f16/q4_0}" ]; then
24
+ echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
25
+ else
26
+ echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
27
+ exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
28
+ fi
29
+ done
30
+ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
31
+ exec ./llama-server "$@"
32
+ else
33
+ echo "Unknown command: $arg1"
34
+ echo "Available commands: "
35
+ echo " --run (-r): Run a model previously converted into ggml"
36
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37
+ echo " --bench (-b): Benchmark the performance of the inference for various parameters."
38
+ echo " ex: -m model.gguf"
39
+ echo " --perplexity (-p): Measure the perplexity of a model over a given text."
40
+ echo " ex: -m model.gguf -f file.txt"
41
+ echo " --convert (-c): Convert a llama model into ggml"
42
+ echo " ex: --outtype f16 \"/models/7B/\" "
43
+ echo " --quantize (-q): Optimize with quantization process ggml"
44
+ echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
45
+ echo " --all-in-one (-a): Execute --convert & --quantize"
46
+ echo " ex: \"/models/\" 7B"
47
+ echo " --server (-s): Run a model on the server"
48
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
49
+ fi
llama.cpp/.devops/vulkan.Dockerfile ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=24.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ # Install build tools
6
+ RUN apt update && apt install -y git build-essential cmake wget
7
+
8
+ # Install Vulkan SDK and cURL
9
+ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10
+ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
11
+ apt update -y && \
12
+ apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
13
+
14
+ # Build it
15
+ WORKDIR /app
16
+
17
+ COPY . .
18
+
19
+ RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
20
+ cmake --build build --config Release -j$(nproc)
21
+
22
+ RUN mkdir -p /app/lib && \
23
+ find build -name "*.so" -exec cp {} /app/lib \;
24
+
25
+ RUN mkdir -p /app/full \
26
+ && cp build/bin/* /app/full \
27
+ && cp *.py /app/full \
28
+ && cp -r gguf-py /app/full \
29
+ && cp -r requirements /app/full \
30
+ && cp requirements.txt /app/full \
31
+ && cp .devops/tools.sh /app/full/tools.sh
32
+
33
+ ## Base image
34
+ FROM ubuntu:$UBUNTU_VERSION AS base
35
+
36
+ RUN apt-get update \
37
+ && apt-get install -y libgomp1 curl libvulkan-dev \
38
+ && apt autoremove -y \
39
+ && apt clean -y \
40
+ && rm -rf /tmp/* /var/tmp/* \
41
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
42
+ && find /var/cache -type f -delete
43
+
44
+ COPY --from=build /app/lib/ /app
45
+
46
+ ### Full
47
+ FROM base AS full
48
+
49
+ COPY --from=build /app/full /app
50
+
51
+ WORKDIR /app
52
+
53
+ RUN apt-get update \
54
+ && apt-get install -y \
55
+ git \
56
+ python3 \
57
+ python3-pip \
58
+ python3-wheel \
59
+ && pip install --break-system-packages --upgrade setuptools \
60
+ && pip install --break-system-packages -r requirements.txt \
61
+ && apt autoremove -y \
62
+ && apt clean -y \
63
+ && rm -rf /tmp/* /var/tmp/* \
64
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
65
+ && find /var/cache -type f -delete
66
+
67
+ ENTRYPOINT ["/app/tools.sh"]
68
+
69
+ ### Light, CLI only
70
+ FROM base AS light
71
+
72
+ COPY --from=build /app/full/llama-cli /app
73
+
74
+ WORKDIR /app
75
+
76
+ ENTRYPOINT [ "/app/llama-cli" ]
77
+
78
+ ### Server, Server only
79
+ FROM base AS server
80
+
81
+ ENV LLAMA_ARG_HOST=0.0.0.0
82
+
83
+ COPY --from=build /app/full/llama-server /app
84
+
85
+ WORKDIR /app
86
+
87
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
88
+
89
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.dockerignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.o
2
+ *.a
3
+ .cache/
4
+ # Do not ignore .git directory, otherwise the reported build number will always be 0
5
+ .github/
6
+ .gitignore
7
+ .vs/
8
+ .vscode/
9
+ .DS_Store
10
+
11
+ build*/
12
+
13
+ models/*
14
+
15
+ /llama-cli
16
+ /llama-quantize
17
+
18
+ arm_neon.h
19
+ compile_commands.json
20
+ Dockerfile
llama.cpp/.ecrc ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
3
+ "Disable": {
4
+ "IndentSize": true
5
+ }
6
+ }
llama.cpp/.editorconfig ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://EditorConfig.org
2
+
3
+ # Top-most EditorConfig file
4
+ root = true
5
+
6
+ # Unix-style newlines with a newline ending every file, utf-8 charset
7
+ [*]
8
+ end_of_line = lf
9
+ insert_final_newline = true
10
+ trim_trailing_whitespace = true
11
+ charset = utf-8
12
+ indent_style = space
13
+ indent_size = 4
14
+
15
+ [Makefile]
16
+ indent_style = tab
17
+
18
+ [scripts/*.mk]
19
+ indent_style = tab
20
+
21
+ [prompts/*.txt]
22
+ insert_final_newline = unset
23
+
24
+ [tools/server/public/*]
25
+ indent_size = 2
26
+
27
+ [tools/server/public/deps_*]
28
+ trim_trailing_whitespace = unset
29
+ indent_style = unset
30
+ indent_size = unset
31
+
32
+ [tools/server/deps_*]
33
+ trim_trailing_whitespace = unset
34
+ indent_style = unset
35
+ indent_size = unset
36
+
37
+ [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
38
+ indent_style = tab
39
+
40
+ [tools/cvector-generator/*.txt]
41
+ trim_trailing_whitespace = unset
42
+ insert_final_newline = unset
43
+
44
+ [models/templates/*.jinja]
45
+ indent_style = unset
46
+ indent_size = unset
47
+ end_of_line = unset
48
+ charset = unset
49
+ trim_trailing_whitespace = unset
50
+ insert_final_newline = unset
51
+
52
+ [vendor/miniaudio/miniaudio.h]
53
+ trim_trailing_whitespace = unset
54
+ insert_final_newline = unset
llama.cpp/.flake8 ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ max-line-length = 125
3
+ ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
4
+ exclude =
5
+ # Do not traverse examples and tools
6
+ examples,
7
+ tools,
8
+ # Do not include package initializers
9
+ __init__.py,
10
+ # No need to traverse our git directory
11
+ .git,
12
+ # There's no value in checking cache directories
13
+ __pycache__,
14
+ # No need to include the build path
15
+ build,
16
+ # This contains builds that we don't want to check
17
+ dist # This is generated with `python build .` for package releases
18
+ # max-complexity = 10
llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (compilation)
2
+ description: Something goes wrong when trying to compile llama.cpp.
3
+ title: "Compile bug: "
4
+ labels: ["bug-unconfirmed", "compilation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the compilation of llama.cpp fails.
11
+ Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
12
+ If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
13
+ by clearing `~/.cache/ccache` (on Linux).
14
+ - type: textarea
15
+ id: commit
16
+ attributes:
17
+ label: Git commit
18
+ description: Which commit are you trying to compile?
19
+ placeholder: |
20
+ $git rev-parse HEAD
21
+ 84a07a17b1b08cf2b9747c633a2372782848a27f
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: true
38
+ - type: dropdown
39
+ id: backends
40
+ attributes:
41
+ label: GGML backends
42
+ description: Which GGML backends do you know to be affected?
43
+ options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
44
+ multiple: true
45
+ validations:
46
+ required: true
47
+ - type: textarea
48
+ id: info
49
+ attributes:
50
+ label: Problem description & steps to reproduce
51
+ description: >
52
+ Please give us a summary of the problem and tell us how to reproduce it.
53
+ If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
54
+ placeholder: >
55
+ I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
56
+ Here are the exact commands that I used: ...
57
+ validations:
58
+ required: true
59
+ - type: textarea
60
+ id: first_bad_commit
61
+ attributes:
62
+ label: First Bad Commit
63
+ description: >
64
+ If the bug was not present on an earlier version: when did it start appearing?
65
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
66
+ validations:
67
+ required: false
68
+ - type: textarea
69
+ id: command
70
+ attributes:
71
+ label: Compile command
72
+ description: >
73
+ Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
74
+ This will be automatically formatted into code, so no need for backticks.
75
+ render: shell
76
+ validations:
77
+ required: true
78
+ - type: textarea
79
+ id: logs
80
+ attributes:
81
+ label: Relevant log output
82
+ description: >
83
+ Please copy and paste any relevant log output, including any generated text.
84
+ This will be automatically formatted into code, so no need for backticks.
85
+ render: shell
86
+ validations:
87
+ required: true
llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (model use)
2
+ description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
3
+ title: "Eval bug: "
4
+ labels: ["bug-unconfirmed", "model evaluation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the model evaluation results
11
+ (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
12
+ If you encountered the issue while using an external UI (e.g. ollama),
13
+ please reproduce your issue using one of the examples/binaries in this repository.
14
+ The `llama-cli` binary can be used for simple and reproducible model inference.
15
+ - type: textarea
16
+ id: version
17
+ attributes:
18
+ label: Name and Version
19
+ description: Which version of our software are you running? (use `--version` to get a version string)
20
+ placeholder: |
21
+ $./llama-cli --version
22
+ version: 2999 (42b4109e)
23
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
24
+ validations:
25
+ required: true
26
+ - type: dropdown
27
+ id: operating-system
28
+ attributes:
29
+ label: Operating systems
30
+ description: Which operating systems do you know to be affected?
31
+ multiple: true
32
+ options:
33
+ - Linux
34
+ - Mac
35
+ - Windows
36
+ - BSD
37
+ - Other? (Please let us know in description)
38
+ validations:
39
+ required: true
40
+ - type: dropdown
41
+ id: backends
42
+ attributes:
43
+ label: GGML backends
44
+ description: Which GGML backends do you know to be affected?
45
+ options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
46
+ multiple: true
47
+ validations:
48
+ required: true
49
+ - type: textarea
50
+ id: hardware
51
+ attributes:
52
+ label: Hardware
53
+ description: Which CPUs/GPUs are you using?
54
+ placeholder: >
55
+ e.g. Ryzen 5950X + 2x RTX 4090
56
+ validations:
57
+ required: true
58
+ - type: textarea
59
+ id: model
60
+ attributes:
61
+ label: Models
62
+ description: >
63
+ Which model(s) at which quantization were you using when encountering the bug?
64
+ If you downloaded a GGUF file off of Huggingface, please provide a link.
65
+ placeholder: >
66
+ e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
67
+ validations:
68
+ required: false
69
+ - type: textarea
70
+ id: info
71
+ attributes:
72
+ label: Problem description & steps to reproduce
73
+ description: >
74
+ Please give us a summary of the problem and tell us how to reproduce it.
75
+ If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
76
+ that information would be very much appreciated by us.
77
+ placeholder: >
78
+ e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
79
+ When I use -ngl 0 it works correctly.
80
+ Here are the exact commands that I used: ...
81
+ validations:
82
+ required: true
83
+ - type: textarea
84
+ id: first_bad_commit
85
+ attributes:
86
+ label: First Bad Commit
87
+ description: >
88
+ If the bug was not present on an earlier version: when did it start appearing?
89
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
90
+ validations:
91
+ required: false
92
+ - type: textarea
93
+ id: logs
94
+ attributes:
95
+ label: Relevant log output
96
+ description: >
97
+ Please copy and paste any relevant log output, including the command that you entered and any generated text.
98
+ This will be automatically formatted into code, so no need for backticks.
99
+ render: shell
100
+ validations:
101
+ required: true
llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (misc.)
2
+ description: Something is not working the way it should (and it's not covered by any of the above cases).
3
+ title: "Misc. bug: "
4
+ labels: ["bug-unconfirmed"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for miscellaneous bugs that don't fit into any other category.
11
+ If you encountered the issue while using an external UI (e.g. ollama),
12
+ please reproduce your issue using one of the examples/binaries in this repository.
13
+ - type: textarea
14
+ id: version
15
+ attributes:
16
+ label: Name and Version
17
+ description: Which version of our software is affected? (You can use `--version` to get a version string.)
18
+ placeholder: |
19
+ $./llama-cli --version
20
+ version: 2999 (42b4109e)
21
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: false
38
+ - type: dropdown
39
+ id: module
40
+ attributes:
41
+ label: Which llama.cpp modules do you know to be affected?
42
+ multiple: true
43
+ options:
44
+ - Documentation/Github
45
+ - libllama (core library)
46
+ - llama-cli
47
+ - llama-server
48
+ - llama-bench
49
+ - llama-quantize
50
+ - Python/Bash scripts
51
+ - Test code
52
+ - Other (Please specify in the next section)
53
+ validations:
54
+ required: false
55
+ - type: textarea
56
+ id: command
57
+ attributes:
58
+ label: Command line
59
+ description: >
60
+ Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
61
+ This will be automatically formatted into code, so no need for backticks.
62
+ render: shell
63
+ validations:
64
+ required: false
65
+ - type: textarea
66
+ id: info
67
+ attributes:
68
+ label: Problem description & steps to reproduce
69
+ description: >
70
+ Please give us a summary of the problem and tell us how to reproduce it (if applicable).
71
+ validations:
72
+ required: true
73
+ - type: textarea
74
+ id: first_bad_commit
75
+ attributes:
76
+ label: First Bad Commit
77
+ description: >
78
+ If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
79
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
80
+ validations:
81
+ required: false
82
+ - type: textarea
83
+ id: logs
84
+ attributes:
85
+ label: Relevant log output
86
+ description: >
87
+ If applicable, please copy and paste any relevant log output, including any generated text.
88
+ This will be automatically formatted into code, so no need for backticks.
89
+ render: shell
90
+ validations:
91
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Enhancement
2
+ description: Used to request enhancements for llama.cpp.
3
+ title: "Feature Request: "
4
+ labels: ["enhancement"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas)
10
+
11
+ - type: checkboxes
12
+ id: prerequisites
13
+ attributes:
14
+ label: Prerequisites
15
+ description: Please confirm the following before submitting your enhancement request.
16
+ options:
17
+ - label: I am running the latest code. Mention the version if possible as well.
18
+ required: true
19
+ - label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md).
20
+ required: true
21
+ - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
22
+ required: true
23
+ - label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share.
24
+ required: true
25
+
26
+ - type: textarea
27
+ id: feature-description
28
+ attributes:
29
+ label: Feature Description
30
+ description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
31
+ placeholder: Detailed description of the enhancement
32
+ validations:
33
+ required: true
34
+
35
+ - type: textarea
36
+ id: motivation
37
+ attributes:
38
+ label: Motivation
39
+ description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
40
+ placeholder: Explanation of why this feature is needed and its benefits
41
+ validations:
42
+ required: true
43
+
44
+ - type: textarea
45
+ id: possible-implementation
46
+ attributes:
47
+ label: Possible Implementation
48
+ description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
49
+ placeholder: Detailed description of potential implementation
50
+ validations:
51
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Research
2
+ description: Track new technical research area.
3
+ title: "Research: "
4
+ labels: ["research 🔬"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
10
+
11
+ - type: checkboxes
12
+ id: research-stage
13
+ attributes:
14
+ label: Research Stage
15
+ description: Track general state of this research ticket
16
+ options:
17
+ - label: Background Research (Let's try to avoid reinventing the wheel)
18
+ - label: Hypothesis Formed (How do you think this will work and it's effect?)
19
+ - label: Strategy / Implementation Forming
20
+ - label: Analysis of results
21
+ - label: Debrief / Documentation (So people in the future can learn from us)
22
+
23
+ - type: textarea
24
+ id: background
25
+ attributes:
26
+ label: Previous existing literature and research
27
+ description: Whats the current state of the art and whats the motivation for this research?
28
+
29
+ - type: textarea
30
+ id: hypothesis
31
+ attributes:
32
+ label: Hypothesis
33
+ description: How do you think this will work and it's effect?
34
+
35
+ - type: textarea
36
+ id: implementation
37
+ attributes:
38
+ label: Implementation
39
+ description: Got an approach? e.g. a PR ready to go?
40
+
41
+ - type: textarea
42
+ id: analysis
43
+ attributes:
44
+ label: Analysis
45
+ description: How does the proposed implementation behave?
46
+
47
+ - type: textarea
48
+ id: logs
49
+ attributes:
50
+ label: Relevant log output
51
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
52
+ render: shell
llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Refactor (Maintainers)
2
+ description: Used to track refactoring opportunities.
3
+ title: "Refactor: "
4
+ labels: ["refactor"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
10
+ Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
11
+
12
+ - type: textarea
13
+ id: background-description
14
+ attributes:
15
+ label: Background Description
16
+ description: Please provide a detailed written description of the pain points you are trying to solve.
17
+ placeholder: Detailed description behind your motivation to request refactor
18
+ validations:
19
+ required: true
20
+
21
+ - type: textarea
22
+ id: possible-approaches
23
+ attributes:
24
+ label: Possible Refactor Approaches
25
+ description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
26
+ placeholder: Your idea of possible refactoring opportunity/approaches
27
+ validations:
28
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blank_issues_enabled: true
2
+ contact_links:
3
+ - name: Got an idea?
4
+ url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas
5
+ about: Pop it there. It may then become an enhancement ticket.
6
+ - name: Got a question?
7
+ url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a
8
+ about: Ask a question there!
9
+ - name: Want to contribute?
10
+ url: https://github.com/ggml-org/llama.cpp/wiki/contribute
11
+ about: Head to the contribution guide page of the wiki for areas you can help with
llama.cpp/.github/actions/get-tag-name/action.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Determine tag name"
2
+ description: "Determine the tag name to use for a release"
3
+ outputs:
4
+ name:
5
+ description: "The name of the tag"
6
+ value: ${{ steps.tag.outputs.name }}
7
+
8
+ runs:
9
+ using: "composite"
10
+ steps:
11
+ - name: Determine tag name
12
+ id: tag
13
+ shell: bash
14
+ run: |
15
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
16
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
17
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
18
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
19
+ else
20
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
21
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
22
+ fi
llama.cpp/.github/actions/windows-setup-cuda/action.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Windows - Setup CUDA Toolkit"
2
+ description: "Setup CUDA Toolkit for Windows"
3
+ inputs:
4
+ cuda_version:
5
+ description: "CUDA toolkit version"
6
+ required: true
7
+
8
+ runs:
9
+ using: "composite"
10
+ steps:
11
+ - name: Install Cuda Toolkit 11.7
12
+ if: ${{ inputs.cuda_version == '11.7' }}
13
+ shell: pwsh
14
+ run: |
15
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
16
+ choco install unzip -y
17
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
18
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
19
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
20
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
21
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
22
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
23
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
24
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
25
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
26
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
27
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
28
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
29
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
30
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
31
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
32
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
33
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
34
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
35
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
36
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
37
+ echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
38
+
39
+ - name: Install Cuda Toolkit 12.4
40
+ if: ${{ inputs.cuda_version == '12.4' }}
41
+ shell: pwsh
42
+ run: |
43
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
44
+ choco install unzip -y
45
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
46
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
47
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
48
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
49
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
50
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
51
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
52
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
53
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
54
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
55
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
56
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
57
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
58
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
59
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
60
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
61
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
62
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
63
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
64
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
65
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
66
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
67
+ echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
llama.cpp/.github/actions/windows-setup-curl/action.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: 'Windows - Setup CURL'
2
+ description: 'Composite action, to be reused in other workflow'
3
+ inputs:
4
+ curl_version:
5
+ description: 'CURL version'
6
+ required: false
7
+ default: '8.6.0_6'
8
+ architecture:
9
+ description: 'Architecture of the libcurl to download'
10
+ required: false
11
+ default: 'win64'
12
+ outputs:
13
+ curl_path:
14
+ description: "Path to the downloaded libcurl"
15
+ value: ${{ steps.get_libcurl.outputs.curl_path }}
16
+
17
+ runs:
18
+ using: "composite"
19
+ steps:
20
+ - name: libCURL
21
+ id: get_libcurl
22
+ shell: powershell
23
+ env:
24
+ CURL_VERSION: ${{ inputs.curl_version }}
25
+ ARCHITECTURE: ${{ inputs.architecture }}
26
+ run: |
27
+ curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-${env:ARCHITECTURE}-mingw.zip"
28
+ mkdir $env:RUNNER_TEMP/libcurl
29
+ tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
30
+ echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT
llama.cpp/.github/labeler.yml ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/actions/labeler
2
+ Apple Metal:
3
+ - changed-files:
4
+ - any-glob-to-any-file:
5
+ - ggml/include/ggml-metal.h
6
+ - ggml/src/ggml-metal/**
7
+ - README-metal.md
8
+ SYCL:
9
+ - changed-files:
10
+ - any-glob-to-any-file:
11
+ - ggml/include/ggml-sycl.h
12
+ - ggml/src/ggml-sycl/**
13
+ - docs/backend/SYCL.md
14
+ - examples/sycl/**
15
+ Nvidia GPU:
16
+ - changed-files:
17
+ - any-glob-to-any-file:
18
+ - ggml/include/ggml-cuda.h
19
+ - ggml/src/ggml-cuda/**
20
+ Vulkan:
21
+ - changed-files:
22
+ - any-glob-to-any-file:
23
+ - ggml/include/ggml-vulkan.h
24
+ - ggml/src/ggml-vulkan/**
25
+ documentation:
26
+ - changed-files:
27
+ - any-glob-to-any-file:
28
+ - docs/**
29
+ - media/**
30
+ testing:
31
+ - changed-files:
32
+ - any-glob-to-any-file:
33
+ - tests/**
34
+ build:
35
+ - changed-files:
36
+ - any-glob-to-any-file:
37
+ - cmake/**
38
+ - CMakeLists.txt
39
+ - CMakePresets.json
40
+ examples:
41
+ - changed-files:
42
+ - any-glob-to-any-file:
43
+ - examples/**
44
+ - tools/**
45
+ devops:
46
+ - changed-files:
47
+ - any-glob-to-any-file:
48
+ - .devops/**
49
+ - .github/**
50
+ - ci/**
51
+ python:
52
+ - changed-files:
53
+ - any-glob-to-any-file:
54
+ - "**/*.py"
55
+ - requirements/**
56
+ - gguf-py/**
57
+ - .flake8
58
+ script:
59
+ - changed-files:
60
+ - any-glob-to-any-file:
61
+ - scripts/**
62
+ android:
63
+ - changed-files:
64
+ - any-glob-to-any-file:
65
+ - examples/llama.android/**
66
+ server:
67
+ - changed-files:
68
+ - any-glob-to-any-file:
69
+ - tools/server/**
70
+ ggml:
71
+ - changed-files:
72
+ - any-glob-to-any-file:
73
+ - ggml/**
74
+ nix:
75
+ - changed-files:
76
+ - any-glob-to-any-file:
77
+ - "**/*.nix"
78
+ - .github/workflows/nix-*.yml
79
+ - .devops/nix/nixpkgs-instances.nix
80
+ embedding:
81
+ - changed-files:
82
+ - any-glob-to-any-file: examples/embedding/
83
+
84
+ Ascend NPU:
85
+ - changed-files:
86
+ - any-glob-to-any-file:
87
+ - ggml/include/ggml-cann.h
88
+ - ggml/src/ggml-cann/**
89
+ - docs/backend/CANN.md
90
+ OpenCL:
91
+ - changed-files:
92
+ - any-glob-to-any-file:
93
+ - ggml/include/ggml-opencl.h
94
+ - ggml/src/ggml-opencl/**
llama.cpp/.github/pull_request_template.md ADDED
@@ -0,0 +1 @@
 
 
1
+ *Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*