GGUF
rohan23998 commited on
Commit
5540e6f
·
verified ·
1 Parent(s): 9dfc571

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .clang-format +161 -0
  2. .clang-tidy +27 -0
  3. .devops/cloud-v-pipeline +22 -0
  4. .devops/cpu.Dockerfile +92 -0
  5. .devops/cuda.Dockerfile +94 -0
  6. .devops/intel.Dockerfile +95 -0
  7. .devops/llama-cli-cann.Dockerfile +44 -0
  8. .devops/llama-cpp-cuda.srpm.spec +83 -0
  9. .devops/llama-cpp.srpm.spec +85 -0
  10. .devops/musa.Dockerfile +101 -0
  11. .devops/nix/apps.nix +21 -0
  12. .devops/nix/devshells.nix +52 -0
  13. .devops/nix/docker.nix +37 -0
  14. .devops/nix/jetson-support.nix +39 -0
  15. .devops/nix/nixpkgs-instances.nix +45 -0
  16. .devops/nix/package-gguf-py.nix +36 -0
  17. .devops/nix/package.nix +247 -0
  18. .devops/nix/python-scripts.nix +66 -0
  19. .devops/nix/scope.nix +41 -0
  20. .devops/nix/sif.nix +27 -0
  21. .devops/rocm.Dockerfile +113 -0
  22. .devops/tools.sh +49 -0
  23. .devops/vulkan.Dockerfile +89 -0
  24. .dockerignore +20 -0
  25. .ecrc +6 -0
  26. .editorconfig +54 -0
  27. .flake8 +18 -0
  28. .gitattributes +39 -0
  29. .github/ISSUE_TEMPLATE/010-bug-compilation.yml +87 -0
  30. .github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
  31. .github/ISSUE_TEMPLATE/019-bug-misc.yml +91 -0
  32. .github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
  33. .github/ISSUE_TEMPLATE/030-research.yml +52 -0
  34. .github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
  35. .github/ISSUE_TEMPLATE/config.yml +11 -0
  36. .github/actions/get-tag-name/action.yml +22 -0
  37. .github/actions/windows-setup-cuda/action.yml +67 -0
  38. .github/actions/windows-setup-curl/action.yml +30 -0
  39. .github/labeler.yml +95 -0
  40. .github/pull_request_template.md +1 -0
  41. .github/workflows/bench.yml.disabled +304 -0
  42. .github/workflows/build-linux-cross.yml +346 -0
  43. .github/workflows/build.yml +1080 -0
  44. .github/workflows/close-issue.yml +28 -0
  45. .github/workflows/docker.yml +178 -0
  46. .github/workflows/editorconfig.yml +29 -0
  47. .github/workflows/gguf-publish.yml +44 -0
  48. .github/workflows/labeler.yml +17 -0
  49. .github/workflows/python-check-requirements.yml +33 -0
  50. .github/workflows/python-lint.yml +30 -0
.clang-format ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Language: Cpp
3
+ AlignAfterOpenBracket: Align
4
+ AlignArrayOfStructures: Left
5
+ AlignConsecutiveAssignments: AcrossComments
6
+ AlignConsecutiveBitFields: AcrossComments
7
+ AlignConsecutiveDeclarations: AcrossComments
8
+ AlignConsecutiveMacros: AcrossComments
9
+ # AlignConsecutiveShortCaseStatements: AcrossComments
10
+ AlignEscapedNewlines: Left # LeftWithLastLine
11
+ AlignOperands: Align
12
+ AlignTrailingComments:
13
+ Kind: Always
14
+ OverEmptyLines: 1
15
+ AllowAllArgumentsOnNextLine: true
16
+ AllowAllParametersOfDeclarationOnNextLine: false
17
+ # AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
18
+ AllowShortBlocksOnASingleLine: Never
19
+ AllowShortCaseLabelsOnASingleLine: false
20
+ AllowShortFunctionsOnASingleLine: Inline
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLambdasOnASingleLine: Inline
23
+ AllowShortLoopsOnASingleLine: false
24
+ AlwaysBreakBeforeMultilineStrings: true
25
+ BinPackArguments: true
26
+ BinPackParameters: true # OnePerLine
27
+ BitFieldColonSpacing: Both
28
+ BreakBeforeBraces: Custom # Attach
29
+ BraceWrapping:
30
+ AfterCaseLabel: true
31
+ AfterClass: false
32
+ AfterControlStatement: false
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: false
46
+ SplitEmptyRecord: false
47
+ SplitEmptyNamespace: false
48
+ # BreakAdjacentStringLiterals: true
49
+ BreakAfterAttributes: Never
50
+ BreakBeforeBinaryOperators: None
51
+ BreakBeforeInlineASMColon: OnlyMultiline
52
+ BreakBeforeTernaryOperators: false
53
+ # BreakBinaryOperations: Never
54
+ BreakConstructorInitializers: AfterColon
55
+ # BreakFunctionDefinitionParameters: false
56
+ BreakInheritanceList: AfterComma
57
+ BreakStringLiterals: true
58
+ # BreakTemplateDeclarations: Yes
59
+ ColumnLimit: 120
60
+ CommentPragmas: '^ IWYU pragma:'
61
+ CompactNamespaces: false
62
+ ConstructorInitializerIndentWidth: 4
63
+ ContinuationIndentWidth: 4
64
+ Cpp11BracedListStyle: false
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ EmptyLineBeforeAccessModifier: Leave
68
+ EmptyLineAfterAccessModifier: Never
69
+ ExperimentalAutoDetectBinPacking: false
70
+ FixNamespaceComments: true
71
+ IncludeBlocks: Regroup
72
+ IncludeCategories:
73
+ - Regex: '^<.*\.h>'
74
+ Priority: 1
75
+ SortPriority: 0
76
+ - Regex: '^<.*'
77
+ Priority: 2
78
+ SortPriority: 0
79
+ - Regex: '.*'
80
+ Priority: 3
81
+ SortPriority: 0
82
+ IncludeIsMainRegex: '([-_](test|unittest))?$'
83
+ IncludeIsMainSourceRegex: ''
84
+ IndentAccessModifiers: false
85
+ IndentCaseBlocks: true
86
+ IndentCaseLabels: true
87
+ IndentExternBlock: NoIndent
88
+ IndentGotoLabels: false
89
+ IndentPPDirectives: AfterHash
90
+ IndentWidth: 4
91
+ IndentWrappedFunctionNames: false
92
+ InsertBraces: true # NOTE: may lead to incorrect formatting
93
+ InsertNewlineAtEOF: true
94
+ JavaScriptQuotes: Leave
95
+ JavaScriptWrapImports: true
96
+ KeepEmptyLinesAtTheStartOfBlocks: false
97
+ LambdaBodyIndentation: Signature
98
+ LineEnding: LF
99
+ MacroBlockBegin: ''
100
+ MacroBlockEnd: ''
101
+ MaxEmptyLinesToKeep: 1
102
+ NamespaceIndentation: None
103
+ ObjCBinPackProtocolList: Auto
104
+ ObjCBlockIndentWidth: 4
105
+ ObjCSpaceAfterProperty: true
106
+ ObjCSpaceBeforeProtocolList: true
107
+ PPIndentWidth: -1
108
+ PackConstructorInitializers: CurrentLine
109
+ PenaltyBreakAssignment: 2
110
+ PenaltyBreakBeforeFirstCallParameter: 1
111
+ PenaltyBreakComment: 300
112
+ PenaltyBreakFirstLessLess: 120
113
+ PenaltyBreakString: 1000
114
+ PenaltyBreakTemplateDeclaration: 10
115
+ PenaltyExcessCharacter: 1000000
116
+ PenaltyReturnTypeOnItsOwnLine: 200
117
+ PointerAlignment: Middle
118
+ QualifierAlignment: Left
119
+ #QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
120
+ RawStringFormats:
121
+ - Language: Cpp
122
+ Delimiters:
123
+ - cc
124
+ - CC
125
+ - cpp
126
+ - Cpp
127
+ - CPP
128
+ - 'c++'
129
+ - 'C++'
130
+ CanonicalDelimiter: ''
131
+ ReferenceAlignment: Middle
132
+ ReflowComments: false # IndentOnly
133
+ SeparateDefinitionBlocks: Always
134
+ SortIncludes: CaseInsensitive
135
+ SortUsingDeclarations: LexicographicNumeric
136
+ SpaceAfterCStyleCast: true
137
+ SpaceAfterLogicalNot: false
138
+ SpaceAfterTemplateKeyword: true
139
+ SpaceBeforeAssignmentOperators: true
140
+ SpaceBeforeCpp11BracedList: false
141
+ SpaceBeforeCtorInitializerColon: true
142
+ SpaceBeforeInheritanceColon: true
143
+ SpaceBeforeParens: ControlStatements
144
+ SpaceBeforeRangeBasedForLoopColon: true
145
+ SpaceInEmptyBlock: false
146
+ SpaceInEmptyParentheses: false
147
+ SpacesBeforeTrailingComments: 2
148
+ SpacesInAngles: Never
149
+ SpacesInContainerLiterals: true
150
+ SpacesInLineCommentPrefix:
151
+ Minimum: 1
152
+ Maximum: -1
153
+ SpacesInParentheses: false
154
+ SpacesInSquareBrackets: false
155
+ SpaceBeforeSquareBrackets: false
156
+ Standard: c++17
157
+ TabWidth: 4
158
+ UseTab: Never
159
+ WhitespaceSensitiveMacros: ['STRINGIZE']
160
+ ...
161
+
.clang-tidy ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Checks: >
3
+ bugprone-*,
4
+ -bugprone-easily-swappable-parameters,
5
+ -bugprone-implicit-widening-of-multiplication-result,
6
+ -bugprone-misplaced-widening-cast,
7
+ -bugprone-narrowing-conversions,
8
+ readability-*,
9
+ -readability-avoid-unconditional-preprocessor-if,
10
+ -readability-function-cognitive-complexity,
11
+ -readability-identifier-length,
12
+ -readability-implicit-bool-conversion,
13
+ -readability-magic-numbers,
14
+ -readability-uppercase-literal-suffix,
15
+ -readability-simplify-boolean-expr,
16
+ -readability-math-missing-parentheses,
17
+ clang-analyzer-*,
18
+ -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
19
+ performance-*,
20
+ portability-*,
21
+ -portability-simd-intrinsics,
22
+ misc-*,
23
+ -misc-const-correctness,
24
+ -misc-non-private-member-variables-in-classes,
25
+ -misc-no-recursion,
26
+ -misc-use-anonymous-namespace,
27
+ FormatStyle: none
.devops/cloud-v-pipeline ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
2
+ stage('Cleanup'){
3
+ cleanWs() // Cleaning previous CI build in workspace
4
+ }
5
+ stage('checkout repo'){
6
+ retry(5){ // Retry if the cloning fails due to some reason
7
+ checkout scm // Clone the repo on Runner
8
+ }
9
+ }
10
+ stage('Compiling llama.cpp'){
11
+ sh'''#!/bin/bash
12
+ make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
13
+ '''
14
+ }
15
+ stage('Running llama.cpp'){
16
+ sh'''#!/bin/bash
17
+ module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
18
+ qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
19
+ cat llama_log.txt # Printing results
20
+ '''
21
+ }
22
+ }
.devops/cpu.Dockerfile ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ ARG TARGETARCH
6
+
7
+ ARG GGML_CPU_ARM_ARCH=armv8-a
8
+
9
+ RUN apt-get update && \
10
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
11
+
12
+ WORKDIR /app
13
+
14
+ COPY . .
15
+
16
+ RUN if [ "$TARGETARCH" = "amd64" ]; then \
17
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18
+ elif [ "$TARGETARCH" = "arm64" ]; then \
19
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
20
+ else \
21
+ echo "Unsupported architecture"; \
22
+ exit 1; \
23
+ fi && \
24
+ cmake --build build -j $(nproc)
25
+
26
+ RUN mkdir -p /app/lib && \
27
+ find build -name "*.so" -exec cp {} /app/lib \;
28
+
29
+ RUN mkdir -p /app/full \
30
+ && cp build/bin/* /app/full \
31
+ && cp *.py /app/full \
32
+ && cp -r gguf-py /app/full \
33
+ && cp -r requirements /app/full \
34
+ && cp requirements.txt /app/full \
35
+ && cp .devops/tools.sh /app/full/tools.sh
36
+
37
+ ## Base image
38
+ FROM ubuntu:$UBUNTU_VERSION AS base
39
+
40
+ RUN apt-get update \
41
+ && apt-get install -y libgomp1 curl\
42
+ && apt autoremove -y \
43
+ && apt clean -y \
44
+ && rm -rf /tmp/* /var/tmp/* \
45
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
46
+ && find /var/cache -type f -delete
47
+
48
+ COPY --from=build /app/lib/ /app
49
+
50
+ ### Full
51
+ FROM base AS full
52
+
53
+ COPY --from=build /app/full /app
54
+
55
+ WORKDIR /app
56
+
57
+ RUN apt-get update \
58
+ && apt-get install -y \
59
+ git \
60
+ python3 \
61
+ python3-pip \
62
+ && pip install --upgrade pip setuptools wheel \
63
+ && pip install -r requirements.txt \
64
+ && apt autoremove -y \
65
+ && apt clean -y \
66
+ && rm -rf /tmp/* /var/tmp/* \
67
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
68
+ && find /var/cache -type f -delete
69
+
70
+ ENTRYPOINT ["/app/tools.sh"]
71
+
72
+ ### Light, CLI only
73
+ FROM base AS light
74
+
75
+ COPY --from=build /app/full/llama-cli /app
76
+
77
+ WORKDIR /app
78
+
79
+ ENTRYPOINT [ "/app/llama-cli" ]
80
+
81
+ ### Server, Server only
82
+ FROM base AS server
83
+
84
+ ENV LLAMA_ARG_HOST=0.0.0.0
85
+
86
+ COPY --from=build /app/full/llama-server /app
87
+
88
+ WORKDIR /app
89
+
90
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
91
+
92
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/cuda.Dockerfile ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.4.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+
11
+ # CUDA architecture to build for (defaults to all supported archs)
12
+ ARG CUDA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
22
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
23
+ fi && \
24
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
25
+ cmake --build build --config Release -j$(nproc)
26
+
27
+ RUN mkdir -p /app/lib && \
28
+ find build -name "*.so" -exec cp {} /app/lib \;
29
+
30
+ RUN mkdir -p /app/full \
31
+ && cp build/bin/* /app/full \
32
+ && cp *.py /app/full \
33
+ && cp -r gguf-py /app/full \
34
+ && cp -r requirements /app/full \
35
+ && cp requirements.txt /app/full \
36
+ && cp .devops/tools.sh /app/full/tools.sh
37
+
38
+ ## Base image
39
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS base
40
+
41
+ RUN apt-get update \
42
+ && apt-get install -y libgomp1 curl\
43
+ && apt autoremove -y \
44
+ && apt clean -y \
45
+ && rm -rf /tmp/* /var/tmp/* \
46
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
47
+ && find /var/cache -type f -delete
48
+
49
+ COPY --from=build /app/lib/ /app
50
+
51
+ ### Full
52
+ FROM base AS full
53
+
54
+ COPY --from=build /app/full /app
55
+
56
+ WORKDIR /app
57
+
58
+ RUN apt-get update \
59
+ && apt-get install -y \
60
+ git \
61
+ python3 \
62
+ python3-pip \
63
+ && pip install --upgrade pip setuptools wheel \
64
+ && pip install -r requirements.txt \
65
+ && apt autoremove -y \
66
+ && apt clean -y \
67
+ && rm -rf /tmp/* /var/tmp/* \
68
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
69
+ && find /var/cache -type f -delete
70
+
71
+
72
+ ENTRYPOINT ["/app/tools.sh"]
73
+
74
+ ### Light, CLI only
75
+ FROM base AS light
76
+
77
+ COPY --from=build /app/full/llama-cli /app
78
+
79
+ WORKDIR /app
80
+
81
+ ENTRYPOINT [ "/app/llama-cli" ]
82
+
83
+ ### Server, Server only
84
+ FROM base AS server
85
+
86
+ ENV LLAMA_ARG_HOST=0.0.0.0
87
+
88
+ COPY --from=build /app/full/llama-server /app
89
+
90
+ WORKDIR /app
91
+
92
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
93
+
94
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/intel.Dockerfile ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
2
+
3
+ ## Build Image
4
+
5
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
6
+
7
+ ARG GGML_SYCL_F16=OFF
8
+ RUN apt-get update && \
9
+ apt-get install -y git libcurl4-openssl-dev
10
+
11
+ WORKDIR /app
12
+
13
+ COPY . .
14
+
15
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
16
+ echo "GGML_SYCL_F16 is set" \
17
+ && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
18
+ fi && \
19
+ echo "Building with dynamic libs" && \
20
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
21
+ cmake --build build --config Release -j$(nproc)
22
+
23
+ RUN mkdir -p /app/lib && \
24
+ find build -name "*.so" -exec cp {} /app/lib \;
25
+
26
+ RUN mkdir -p /app/full \
27
+ && cp build/bin/* /app/full \
28
+ && cp *.py /app/full \
29
+ && cp -r gguf-py /app/full \
30
+ && cp -r requirements /app/full \
31
+ && cp requirements.txt /app/full \
32
+ && cp .devops/tools.sh /app/full/tools.sh
33
+
34
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
35
+
36
+ RUN apt-get update \
37
+ && apt-get install -y libgomp1 curl\
38
+ && apt autoremove -y \
39
+ && apt clean -y \
40
+ && rm -rf /tmp/* /var/tmp/* \
41
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
42
+ && find /var/cache -type f -delete
43
+
44
+ ### Full
45
+ FROM base AS full
46
+
47
+ COPY --from=build /app/lib/ /app
48
+ COPY --from=build /app/full /app
49
+
50
+ WORKDIR /app
51
+
52
+ RUN apt-get update && \
53
+ apt-get install -y \
54
+ git \
55
+ python3 \
56
+ python3-pip \
57
+ python3-venv && \
58
+ python3 -m venv /opt/venv && \
59
+ . /opt/venv/bin/activate && \
60
+ pip install --upgrade pip setuptools wheel && \
61
+ pip install -r requirements.txt && \
62
+ apt autoremove -y && \
63
+ apt clean -y && \
64
+ rm -rf /tmp/* /var/tmp/* && \
65
+ find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
66
+ find /var/cache -type f -delete
67
+
68
+ ENV PATH="/opt/venv/bin:$PATH"
69
+
70
+ ENTRYPOINT ["/app/tools.sh"]
71
+
72
+ ### Light, CLI only
73
+ FROM base AS light
74
+
75
+ COPY --from=build /app/lib/ /app
76
+ COPY --from=build /app/full/llama-cli /app
77
+
78
+ WORKDIR /app
79
+
80
+ ENTRYPOINT [ "/app/llama-cli" ]
81
+
82
+ ### Server, Server only
83
+ FROM base AS server
84
+
85
+ ENV LLAMA_ARG_HOST=0.0.0.0
86
+
87
+ COPY --from=build /app/lib/ /app
88
+ COPY --from=build /app/full/llama-server /app
89
+
90
+ WORKDIR /app
91
+
92
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
93
+
94
+ ENTRYPOINT [ "/app/llama-server" ]
95
+
.devops/llama-cli-cann.Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
2
+
3
+ FROM ascendai/cann:$ASCEND_VERSION AS build
4
+
5
+ WORKDIR /app
6
+
7
+ COPY . .
8
+
9
+ RUN yum install -y gcc g++ cmake make libcurl-devel
10
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
11
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
12
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
13
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
14
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
15
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
16
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
17
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
18
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
19
+
20
+ # find libascend_hal.so, because the drive hasn`t been mounted.
21
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
22
+
23
+ RUN echo "Building with static libs" && \
24
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
26
+ cmake --build build --config Release --target llama-cli
27
+
28
+ # TODO: use image with NNRT
29
+ FROM ascendai/cann:$ASCEND_VERSION AS runtime
30
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
31
+
32
+ ENV LC_ALL=C.utf8
33
+
34
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
35
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
36
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
37
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
38
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
39
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
40
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
41
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
42
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
43
+
44
+ ENTRYPOINT ["/llama-cli" ]
.devops/llama-cpp-cuda.srpm.spec ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - boeroboy@gmail.com
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13
+ # It is up to the user to install the correct vendor-specific support.
14
+
15
+ Name: llama.cpp-cuda
16
+ Version: %( date "+%%Y%%m%%d" )
17
+ Release: 1%{?dist}
18
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19
+ License: MIT
20
+ Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
21
+ BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22
+ Requires: cuda-toolkit
23
+ URL: https://github.com/ggml-org/llama.cpp
24
+
25
+ %define debug_package %{nil}
26
+ %define source_date_epoch_from_changelog 0
27
+
28
+ %description
29
+ CPU inference for Meta's Lllama2 models using default options.
30
+
31
+ %prep
32
+ %setup -n llama.cpp-master
33
+
34
+ %build
35
+ make -j GGML_CUDA=1
36
+
37
+ %install
38
+ mkdir -p %{buildroot}%{_bindir}/
39
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40
+ cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
41
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
42
+
43
+ mkdir -p %{buildroot}/usr/lib/systemd/system
44
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
45
+ [Unit]
46
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
47
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48
+
49
+ [Service]
50
+ Type=simple
51
+ EnvironmentFile=/etc/sysconfig/llama
52
+ ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
53
+ ExecReload=/bin/kill -s HUP $MAINPID
54
+ Restart=never
55
+
56
+ [Install]
57
+ WantedBy=default.target
58
+ EOF
59
+
60
+ mkdir -p %{buildroot}/etc/sysconfig
61
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63
+ EOF
64
+
65
+ %clean
66
+ rm -rf %{buildroot}
67
+ rm -rf %{_builddir}/*
68
+
69
+ %files
70
+ %{_bindir}/llama-cuda-cli
71
+ %{_bindir}/llama-cuda-server
72
+ %{_bindir}/llama-cuda-simple
73
+ /usr/lib/systemd/system/llamacuda.service
74
+ %config /etc/sysconfig/llama
75
+
76
+ %pre
77
+
78
+ %post
79
+
80
+ %preun
81
+ %postun
82
+
83
+ %changelog
.devops/llama-cpp.srpm.spec ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - boeroboy@gmail.com
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # In the meantime, YYYYMMDD format will be used.
10
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
11
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
12
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
13
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
14
+ # It is up to the user to install the correct vendor-specific support.
15
+
16
+ Name: llama.cpp
17
+ Version: %( date "+%%Y%%m%%d" )
18
+ Release: 1%{?dist}
19
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
20
+ License: MIT
21
+ Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
22
+ BuildRequires: coreutils make gcc-c++ git libstdc++-devel
23
+ Requires: libstdc++
24
+ URL: https://github.com/ggml-org/llama.cpp
25
+
26
+ %define debug_package %{nil}
27
+ %define source_date_epoch_from_changelog 0
28
+
29
+ %description
30
+ CPU inference for Meta's Lllama2 models using default options.
31
+ Models are not included in this package and must be downloaded separately.
32
+
33
+ %prep
34
+ %setup -n llama.cpp-master
35
+
36
+ %build
37
+ make -j
38
+
39
+ %install
40
+ mkdir -p %{buildroot}%{_bindir}/
41
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42
+ cp -p llama-server %{buildroot}%{_bindir}/llama-server
43
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
44
+
45
+ mkdir -p %{buildroot}/usr/lib/systemd/system
46
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
47
+ [Unit]
48
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
49
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
50
+
51
+ [Service]
52
+ Type=simple
53
+ EnvironmentFile=/etc/sysconfig/llama
54
+ ExecStart=/usr/bin/llama-server $LLAMA_ARGS
55
+ ExecReload=/bin/kill -s HUP $MAINPID
56
+ Restart=never
57
+
58
+ [Install]
59
+ WantedBy=default.target
60
+ EOF
61
+
62
+ mkdir -p %{buildroot}/etc/sysconfig
63
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
64
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
65
+ EOF
66
+
67
+ %clean
68
+ rm -rf %{buildroot}
69
+ rm -rf %{_builddir}/*
70
+
71
+ %files
72
+ %{_bindir}/llama-cli
73
+ %{_bindir}/llama-server
74
+ %{_bindir}/llama-simple
75
+ /usr/lib/systemd/system/llama.service
76
+ %config /etc/sysconfig/llama
77
+
78
+ %pre
79
+
80
+ %post
81
+
82
+ %preun
83
+ %postun
84
+
85
+ %changelog
.devops/musa.Dockerfile ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc4.0.1
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+
11
+ # MUSA architecture to build for (defaults to all supported archs)
12
+ ARG MUSA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y \
16
+ build-essential \
17
+ cmake \
18
+ python3 \
19
+ python3-pip \
20
+ git \
21
+ libcurl4-openssl-dev \
22
+ libgomp1
23
+
24
+ WORKDIR /app
25
+
26
+ COPY . .
27
+
28
+ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
29
+ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
30
+ fi && \
31
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
32
+ cmake --build build --config Release -j$(nproc)
33
+
34
+ RUN mkdir -p /app/lib && \
35
+ find build -name "*.so" -exec cp {} /app/lib \;
36
+
37
+ RUN mkdir -p /app/full \
38
+ && cp build/bin/* /app/full \
39
+ && cp *.py /app/full \
40
+ && cp -r gguf-py /app/full \
41
+ && cp -r requirements /app/full \
42
+ && cp requirements.txt /app/full \
43
+ && cp .devops/tools.sh /app/full/tools.sh
44
+
45
+ ## Base image
46
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS base
47
+
48
+ RUN apt-get update \
49
+ && apt-get install -y libgomp1 curl\
50
+ && apt autoremove -y \
51
+ && apt clean -y \
52
+ && rm -rf /tmp/* /var/tmp/* \
53
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
54
+ && find /var/cache -type f -delete
55
+
56
+ COPY --from=build /app/lib/ /app
57
+
58
+ ### Full
59
+ FROM base AS full
60
+
61
+ COPY --from=build /app/full /app
62
+
63
+ WORKDIR /app
64
+
65
+ RUN apt-get update \
66
+ && apt-get install -y \
67
+ git \
68
+ python3 \
69
+ python3-pip \
70
+ && pip install --upgrade pip setuptools wheel \
71
+ && pip install -r requirements.txt \
72
+ && apt autoremove -y \
73
+ && apt clean -y \
74
+ && rm -rf /tmp/* /var/tmp/* \
75
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
76
+ && find /var/cache -type f -delete
77
+
78
+
79
+ ENTRYPOINT ["/app/tools.sh"]
80
+
81
+ ### Light, CLI only
82
+ FROM base AS light
83
+
84
+ COPY --from=build /app/full/llama-cli /app
85
+
86
+ WORKDIR /app
87
+
88
+ ENTRYPOINT [ "/app/llama-cli" ]
89
+
90
+ ### Server, Server only
91
+ FROM base AS server
92
+
93
+ ENV LLAMA_ARG_HOST=0.0.0.0
94
+
95
+ COPY --from=build /app/full/llama-server /app
96
+
97
+ WORKDIR /app
98
+
99
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
100
+
101
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/nix/apps.nix ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ perSystem =
3
+ { config, lib, ... }:
4
+ {
5
+ apps =
6
+ let
7
+ inherit (config.packages) default;
8
+ binaries = [
9
+ "llama-cli"
10
+ "llama-embedding"
11
+ "llama-server"
12
+ "llama-quantize"
13
+ ];
14
+ mkApp = name: {
15
+ type = "app";
16
+ program = "${default}/bin/${name}";
17
+ };
18
+ in
19
+ lib.genAttrs binaries mkApp;
20
+ };
21
+ }
.devops/nix/devshells.nix ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+
3
+ {
4
+ perSystem =
5
+ {
6
+ config,
7
+ lib,
8
+ system,
9
+ ...
10
+ }:
11
+ {
12
+ devShells =
13
+ let
14
+ pkgs = import inputs.nixpkgs { inherit system; };
15
+ stdenv = pkgs.stdenv;
16
+ scripts = config.packages.python-scripts;
17
+ in
18
+ lib.pipe (config.packages) [
19
+ (lib.concatMapAttrs (
20
+ name: package: {
21
+ ${name} = pkgs.mkShell {
22
+ name = "${name}";
23
+ inputsFrom = [ package ];
24
+ shellHook = ''
25
+ echo "Entering ${name} devShell"
26
+ '';
27
+ };
28
+ "${name}-extra" =
29
+ if (name == "python-scripts") then
30
+ null
31
+ else
32
+ pkgs.mkShell {
33
+ name = "${name}-extra";
34
+ inputsFrom = [
35
+ package
36
+ scripts
37
+ ];
38
+ # Extra packages that *may* be used by some scripts
39
+ packages = [
40
+ pkgs.python3Packages.tiktoken
41
+ ];
42
+ shellHook = ''
43
+ echo "Entering ${name} devShell"
44
+ addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
45
+ '';
46
+ };
47
+ }
48
+ ))
49
+ (lib.filterAttrs (name: value: value != null))
50
+ ];
51
+ };
52
+ }
.devops/nix/docker.nix ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ dockerTools,
4
+ buildEnv,
5
+ llama-cpp,
6
+ interactive ? true,
7
+ coreutils,
8
+ }:
9
+
10
+ # A tar that can be fed into `docker load`:
11
+ #
12
+ # $ nix build .#llamaPackages.docker
13
+ # $ docker load < result
14
+
15
+ # For details and variations cf.
16
+ # - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
17
+ # - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
18
+ # - https://nixery.dev/
19
+
20
+ # Approximate (compressed) sizes, at the time of writing, are:
21
+ #
22
+ # .#llamaPackages.docker: 125M;
23
+ # .#llamaPackagesCuda.docker: 537M;
24
+ # .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
25
+
26
+ dockerTools.buildLayeredImage {
27
+ name = llama-cpp.pname;
28
+ tag = "latest";
29
+
30
+ contents =
31
+ [ llama-cpp ]
32
+ ++ lib.optionals interactive [
33
+ coreutils
34
+ dockerTools.binSh
35
+ dockerTools.caCertificates
36
+ ];
37
+ }
.devops/nix/jetson-support.nix ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ perSystem =
4
+ {
5
+ config,
6
+ system,
7
+ lib,
8
+ pkgsCuda,
9
+ ...
10
+ }:
11
+ {
12
+ legacyPackages =
13
+ let
14
+ caps.llamaPackagesXavier = "7.2";
15
+ caps.llamaPackagesOrin = "8.7";
16
+ caps.llamaPackagesTX2 = "6.2";
17
+ caps.llamaPackagesNano = "5.3";
18
+
19
+ pkgsFor =
20
+ cap:
21
+ import inputs.nixpkgs {
22
+ inherit system;
23
+ config = {
24
+ cudaSupport = true;
25
+ cudaCapabilities = [ cap ];
26
+ cudaEnableForwardCompat = false;
27
+ inherit (pkgsCuda.config) allowUnfreePredicate;
28
+ };
29
+ };
30
+ in
31
+ builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
32
+
33
+ packages = lib.optionalAttrs (system == "aarch64-linux") {
34
+ jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
35
+ jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
36
+ jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
37
+ };
38
+ };
39
+ }
.devops/nix/nixpkgs-instances.nix ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ # The _module.args definitions are passed on to modules as arguments. E.g.
4
+ # the module `{ pkgs ... }: { /* config */ }` implicitly uses
5
+ # `_module.args.pkgs` (defined in this case by flake-parts).
6
+ perSystem =
7
+ { system, ... }:
8
+ {
9
+ _module.args = {
10
+ # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
11
+ # again, the below creates several nixpkgs instances which the
12
+ # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
13
+ #
14
+ # This is currently "slow" and "expensive", on a certain scale.
15
+ # This also isn't "right" in that this hinders dependency injection at
16
+ # the level of flake inputs. This might get removed in the foreseeable
17
+ # future.
18
+ #
19
+ # Note that you can use these expressions without Nix
20
+ # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
21
+
22
+ pkgsCuda = import inputs.nixpkgs {
23
+ inherit system;
24
+ # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
25
+ # and ucx are built with CUDA support)
26
+ config.cudaSupport = true;
27
+ config.allowUnfreePredicate =
28
+ p:
29
+ builtins.all (
30
+ license:
31
+ license.free
32
+ || builtins.elem license.shortName [
33
+ "CUDA EULA"
34
+ "cuDNN EULA"
35
+ ]
36
+ ) (p.meta.licenses or [ p.meta.license ]);
37
+ };
38
+ # Ensure dependencies use ROCm consistently
39
+ pkgsRocm = import inputs.nixpkgs {
40
+ inherit system;
41
+ config.rocmSupport = true;
42
+ };
43
+ };
44
+ };
45
+ }
.devops/nix/package-gguf-py.nix ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ llamaVersion,
4
+ numpy,
5
+ tqdm,
6
+ sentencepiece,
7
+ pyyaml,
8
+ poetry-core,
9
+ buildPythonPackage,
10
+ pytestCheckHook,
11
+ }:
12
+
13
+ buildPythonPackage {
14
+ pname = "gguf";
15
+ version = llamaVersion;
16
+ pyproject = true;
17
+ nativeBuildInputs = [ poetry-core ];
18
+ propagatedBuildInputs = [
19
+ numpy
20
+ tqdm
21
+ sentencepiece
22
+ pyyaml
23
+ ];
24
+ src = lib.cleanSource ../../gguf-py;
25
+ pythonImportsCheck = [
26
+ "numpy"
27
+ "gguf"
28
+ ];
29
+ nativeCheckInputs = [ pytestCheckHook ];
30
+ doCheck = true;
31
+ meta = with lib; {
32
+ description = "Python package for writing binary files in the GGUF format";
33
+ license = licenses.mit;
34
+ maintainers = [ maintainers.ditsuke ];
35
+ };
36
+ }
.devops/nix/package.nix ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ glibc,
4
+ config,
5
+ stdenv,
6
+ runCommand,
7
+ cmake,
8
+ ninja,
9
+ pkg-config,
10
+ git,
11
+ mpi,
12
+ blas,
13
+ cudaPackages,
14
+ autoAddDriverRunpath,
15
+ darwin,
16
+ rocmPackages,
17
+ vulkan-headers,
18
+ vulkan-loader,
19
+ curl,
20
+ shaderc,
21
+ useBlas ?
22
+ builtins.all (x: !x) [
23
+ useCuda
24
+ useMetalKit
25
+ useRocm
26
+ useVulkan
27
+ ]
28
+ && blas.meta.available,
29
+ useCuda ? config.cudaSupport,
30
+ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
31
+ # Increases the runtime closure size by ~700M
32
+ useMpi ? false,
33
+ useRocm ? config.rocmSupport,
34
+ rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
35
+ enableCurl ? true,
36
+ useVulkan ? false,
37
+ llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
38
+
39
+ # It's necessary to consistently use backendStdenv when building with CUDA support,
40
+ # otherwise we get libstdc++ errors downstream.
41
+ effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
42
+ enableStatic ? effectiveStdenv.hostPlatform.isStatic,
43
+ precompileMetalShaders ? false,
44
+ }:
45
+
46
+ let
47
+ inherit (lib)
48
+ cmakeBool
49
+ cmakeFeature
50
+ optionals
51
+ strings
52
+ ;
53
+
54
+ stdenv = throw "Use effectiveStdenv instead";
55
+
56
+ suffices =
57
+ lib.optionals useBlas [ "BLAS" ]
58
+ ++ lib.optionals useCuda [ "CUDA" ]
59
+ ++ lib.optionals useMetalKit [ "MetalKit" ]
60
+ ++ lib.optionals useMpi [ "MPI" ]
61
+ ++ lib.optionals useRocm [ "ROCm" ]
62
+ ++ lib.optionals useVulkan [ "Vulkan" ];
63
+
64
+ pnameSuffix =
65
+ strings.optionalString (suffices != [ ])
66
+ "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
67
+ descriptionSuffix = strings.optionalString (
68
+ suffices != [ ]
69
+ ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
70
+
71
+ xcrunHost = runCommand "xcrunHost" { } ''
72
+ mkdir -p $out/bin
73
+ ln -s /usr/bin/xcrun $out/bin
74
+ '';
75
+
76
+ # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
77
+ # separately
78
+ darwinBuildInputs =
79
+ with darwin.apple_sdk.frameworks;
80
+ [
81
+ Accelerate
82
+ CoreVideo
83
+ CoreGraphics
84
+ ]
85
+ ++ optionals useMetalKit [ MetalKit ];
86
+
87
+ cudaBuildInputs = with cudaPackages; [
88
+ cuda_cudart
89
+ cuda_cccl # <nv/target>
90
+ libcublas
91
+ ];
92
+
93
+ rocmBuildInputs = with rocmPackages; [
94
+ clr
95
+ hipblas
96
+ rocblas
97
+ ];
98
+
99
+ vulkanBuildInputs = [
100
+ vulkan-headers
101
+ vulkan-loader
102
+ shaderc
103
+ ];
104
+ in
105
+
106
+ effectiveStdenv.mkDerivation (finalAttrs: {
107
+ pname = "llama-cpp${pnameSuffix}";
108
+ version = llamaVersion;
109
+
110
+ # Note: none of the files discarded here are visible in the sandbox or
111
+ # affect the output hash. This also means they can be modified without
112
+ # triggering a rebuild.
113
+ src = lib.cleanSourceWith {
114
+ filter =
115
+ name: type:
116
+ let
117
+ noneOf = builtins.all (x: !x);
118
+ baseName = baseNameOf name;
119
+ in
120
+ noneOf [
121
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
122
+ (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
123
+ (lib.hasPrefix "." baseName) # Skip hidden files and directories
124
+ (baseName == "flake.lock")
125
+ ];
126
+ src = lib.cleanSource ../../.;
127
+ };
128
+
129
+ postPatch = ''
130
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
131
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
132
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
133
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
134
+ '';
135
+
136
+ # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
137
+ # `default.metallib` may be compiled with Metal compiler from XCode
138
+ # and we need to escape sandbox on MacOS to access Metal compiler.
139
+ # `xcrun` is used find the path of the Metal compiler, which is varible
140
+ # and not on $PATH
141
+ # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
142
+ __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
143
+
144
+ nativeBuildInputs =
145
+ [
146
+ cmake
147
+ ninja
148
+ pkg-config
149
+ git
150
+ ]
151
+ ++ optionals useCuda [
152
+ cudaPackages.cuda_nvcc
153
+
154
+ autoAddDriverRunpath
155
+ ]
156
+ ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
157
+ ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
158
+
159
+ buildInputs =
160
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
161
+ ++ optionals useCuda cudaBuildInputs
162
+ ++ optionals useMpi [ mpi ]
163
+ ++ optionals useRocm rocmBuildInputs
164
+ ++ optionals useBlas [ blas ]
165
+ ++ optionals useVulkan vulkanBuildInputs
166
+ ++ optionals enableCurl [ curl ];
167
+
168
+ cmakeFlags =
169
+ [
170
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
171
+ (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
172
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
173
+ (cmakeBool "LLAMA_CURL" enableCurl)
174
+ (cmakeBool "GGML_NATIVE" false)
175
+ (cmakeBool "GGML_BLAS" useBlas)
176
+ (cmakeBool "GGML_CUDA" useCuda)
177
+ (cmakeBool "GGML_HIP" useRocm)
178
+ (cmakeBool "GGML_METAL" useMetalKit)
179
+ (cmakeBool "GGML_VULKAN" useVulkan)
180
+ (cmakeBool "GGML_STATIC" enableStatic)
181
+ ]
182
+ ++ optionals useCuda [
183
+ (
184
+ with cudaPackages.flags;
185
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
186
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
187
+ )
188
+ )
189
+ ]
190
+ ++ optionals useRocm [
191
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
192
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
193
+ ]
194
+ ++ optionals useMetalKit [
195
+ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
196
+ (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
197
+ ];
198
+
199
+ # Environment variables needed for ROCm
200
+ env = optionals useRocm {
201
+ ROCM_PATH = "${rocmPackages.clr}";
202
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
203
+ };
204
+
205
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
206
+ # if they haven't been added yet.
207
+ postInstall = ''
208
+ mkdir -p $out/include
209
+ cp $src/include/llama.h $out/include/
210
+ '';
211
+
212
+ meta = {
213
+ # Configurations we don't want even the CI to evaluate. Results in the
214
+ # "unsupported platform" messages. This is mostly a no-op, because
215
+ # cudaPackages would've refused to evaluate anyway.
216
+ badPlatforms = optionals useCuda lib.platforms.darwin;
217
+
218
+ # Configurations that are known to result in build failures. Can be
219
+ # overridden by importing Nixpkgs with `allowBroken = true`.
220
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
221
+
222
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
223
+ homepage = "https://github.com/ggml-org/llama.cpp/";
224
+ license = lib.licenses.mit;
225
+
226
+ # Accommodates `nix run` and `lib.getExe`
227
+ mainProgram = "llama-cli";
228
+
229
+ # These people might respond, on the best effort basis, if you ping them
230
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
231
+ # Consider adding yourself to this list if you want to ensure this flake
232
+ # stays maintained and you're willing to invest your time. Do not add
233
+ # other people without their consent. Consider removing people after
234
+ # they've been unreachable for long periods of time.
235
+
236
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
237
+ # an attrset following the same format as in
238
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
239
+ maintainers = with lib.maintainers; [
240
+ philiptaron
241
+ SomeoneSerge
242
+ ];
243
+
244
+ # Extend `badPlatforms` instead
245
+ platforms = lib.platforms.all;
246
+ };
247
+ })
.devops/nix/python-scripts.nix ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ stdenv,
4
+ buildPythonPackage,
5
+ poetry-core,
6
+ mkShell,
7
+ python3Packages,
8
+ gguf-py,
9
+ }@inputs:
10
+
11
+ let
12
+ llama-python-deps = with python3Packages; [
13
+ numpy
14
+ sentencepiece
15
+ transformers
16
+ protobuf
17
+ torchWithoutCuda
18
+ gguf-py
19
+ tqdm
20
+
21
+ # for scripts/compare-llama-bench.py
22
+ gitpython
23
+ tabulate
24
+
25
+ # for examples/pydantic-models-to-grammar-examples.py
26
+ docstring-parser
27
+ pydantic
28
+
29
+ ];
30
+
31
+ llama-python-test-deps = with python3Packages; [
32
+ # Server bench
33
+ matplotlib
34
+
35
+ # server tests
36
+ openai
37
+ pytest
38
+ prometheus-client
39
+ ];
40
+ in
41
+
42
+ buildPythonPackage ({
43
+ pname = "llama-scripts";
44
+ version = "0.0.0";
45
+ pyproject = true;
46
+
47
+ # NOTE: The files filtered out here are not visible in the build sandbox, neither
48
+ # do they affect the output hash. They can be modified without triggering a rebuild.
49
+ src = lib.cleanSourceWith {
50
+ filter =
51
+ name: type:
52
+ let
53
+ any = builtins.any (x: x);
54
+ baseName = builtins.baseNameOf name;
55
+ in
56
+ any [
57
+ (lib.hasSuffix ".py" name)
58
+ (baseName == "README.md")
59
+ (baseName == "pyproject.toml")
60
+ ];
61
+ src = lib.cleanSource ../../.;
62
+ };
63
+ nativeBuildInputs = [ poetry-core ];
64
+ nativeCheckInputs = llama-python-test-deps;
65
+ dependencies = llama-python-deps;
66
+ })
.devops/nix/scope.nix ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ newScope,
4
+ python3,
5
+ llamaVersion ? "0.0.0",
6
+ }:
7
+
8
+ let
9
+ pythonPackages = python3.pkgs;
10
+ buildPythonPackage = pythonPackages.buildPythonPackage;
11
+ numpy = pythonPackages.numpy;
12
+ tqdm = pythonPackages.tqdm;
13
+ sentencepiece = pythonPackages.sentencepiece;
14
+ pyyaml = pythonPackages.pyyaml;
15
+ poetry-core = pythonPackages.poetry-core;
16
+ pytestCheckHook = pythonPackages.pytestCheckHook;
17
+ in
18
+
19
+ # We're using `makeScope` instead of just writing out an attrset
20
+ # because it allows users to apply overlays later using `overrideScope'`.
21
+ # Cf. https://noogle.dev/f/lib/makeScope
22
+
23
+ lib.makeScope newScope (self: {
24
+ inherit llamaVersion;
25
+ gguf-py = self.callPackage ./package-gguf-py.nix {
26
+ inherit
27
+ buildPythonPackage
28
+ numpy
29
+ tqdm
30
+ sentencepiece
31
+ poetry-core
32
+ pyyaml
33
+ pytestCheckHook
34
+ ;
35
+ };
36
+ python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
37
+ llama-cpp = self.callPackage ./package.nix { };
38
+ docker = self.callPackage ./docker.nix { };
39
+ docker-min = self.callPackage ./docker.nix { interactive = false; };
40
+ sif = self.callPackage ./sif.nix { };
41
+ })
.devops/nix/sif.nix ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ singularity-tools,
4
+ llama-cpp,
5
+ bashInteractive,
6
+ interactive ? false,
7
+ }:
8
+
9
+ let
10
+ optionalInt = cond: x: if cond then x else 0;
11
+ in
12
+ singularity-tools.buildImage rec {
13
+ inherit (llama-cpp) name;
14
+ contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
15
+
16
+ # These are excessive (but safe) for most variants. Building singularity
17
+ # images requires superuser privileges, so we build them inside a VM in a
18
+ # writable image of pre-determined size.
19
+ #
20
+ # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
21
+ #
22
+ # Expected image sizes:
23
+ # - cpu/blas: 150M,
24
+ # - cuda, all gencodes: 560M,
25
+ diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
26
+ memSize = diskSize;
27
+ }
.devops/rocm.Dockerfile ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=24.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=6.3
5
+ ARG AMDGPU_VERSION=6.3
6
+
7
+ # Target the CUDA build image
8
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
9
+
10
+ ### Build image
11
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
12
+
13
+ # Unless otherwise specified, we make a fat build.
14
+ # List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
15
+ # This is mostly tied to rocBLAS supported archs.
16
+ # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17
+ # gfx906 is deprecated
18
+ #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
19
+
20
+ ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21
+ #ARG ROCM_DOCKER_ARCH=gfx1100
22
+
23
+ # Set nvcc architectured
24
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
25
+ # Enable ROCm
26
+ # ENV CC=/opt/rocm/llvm/bin/clang
27
+ # ENV CXX=/opt/rocm/llvm/bin/clang++
28
+
29
+ RUN apt-get update \
30
+ && apt-get install -y \
31
+ build-essential \
32
+ cmake \
33
+ git \
34
+ libcurl4-openssl-dev \
35
+ curl \
36
+ libgomp1
37
+
38
+ WORKDIR /app
39
+
40
+ COPY . .
41
+
42
+ RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43
+ cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
44
+ && cmake --build build --config Release -j$(nproc)
45
+
46
+ RUN mkdir -p /app/lib \
47
+ && find build -name "*.so" -exec cp {} /app/lib \;
48
+
49
+ RUN mkdir -p /app/full \
50
+ && cp build/bin/* /app/full \
51
+ && cp *.py /app/full \
52
+ && cp -r gguf-py /app/full \
53
+ && cp -r requirements /app/full \
54
+ && cp requirements.txt /app/full \
55
+ && cp .devops/tools.sh /app/full/tools.sh
56
+
57
+ ## Base image
58
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS base
59
+
60
+ RUN apt-get update \
61
+ && apt-get install -y libgomp1 curl\
62
+ && apt autoremove -y \
63
+ && apt clean -y \
64
+ && rm -rf /tmp/* /var/tmp/* \
65
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
66
+ && find /var/cache -type f -delete
67
+
68
+ COPY --from=build /app/lib/ /app
69
+
70
+ ### Full
71
+ FROM base AS full
72
+
73
+ COPY --from=build /app/full /app
74
+
75
+ WORKDIR /app
76
+
77
+ RUN apt-get update \
78
+ && apt-get install -y \
79
+ git \
80
+ python3-pip \
81
+ python3 \
82
+ python3-wheel\
83
+ && pip install --break-system-packages --upgrade setuptools \
84
+ && pip install --break-system-packages -r requirements.txt \
85
+ && apt autoremove -y \
86
+ && apt clean -y \
87
+ && rm -rf /tmp/* /var/tmp/* \
88
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
89
+ && find /var/cache -type f -delete
90
+
91
+ ENTRYPOINT ["/app/tools.sh"]
92
+
93
+ ### Light, CLI only
94
+ FROM base AS light
95
+
96
+ COPY --from=build /app/full/llama-cli /app
97
+
98
+ WORKDIR /app
99
+
100
+ ENTRYPOINT [ "/app/llama-cli" ]
101
+
102
+ ### Server, Server only
103
+ FROM base AS server
104
+
105
+ ENV LLAMA_ARG_HOST=0.0.0.0
106
+
107
+ COPY --from=build /app/full/llama-server /app
108
+
109
+ WORKDIR /app
110
+
111
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
112
+
113
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/tools.sh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ # Read the first argument into a variable
5
+ arg1="$1"
6
+
7
+ # Shift the arguments to remove the first one
8
+ shift
9
+
10
+ if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
11
+ exec python3 ./convert_hf_to_gguf.py "$@"
12
+ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
13
+ exec ./llama-quantize "$@"
14
+ elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
15
+ exec ./llama-cli "$@"
16
+ elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
17
+ exec ./llama-bench "$@"
18
+ elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
19
+ exec ./llama-perplexity "$@"
20
+ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
21
+ echo "Converting PTH to GGML..."
22
+ for i in $(ls $1/$2/ggml-model-f16.bin*); do
23
+ if [ -f "${i/f16/q4_0}" ]; then
24
+ echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
25
+ else
26
+ echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
27
+ exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
28
+ fi
29
+ done
30
+ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
31
+ exec ./llama-server "$@"
32
+ else
33
+ echo "Unknown command: $arg1"
34
+ echo "Available commands: "
35
+ echo " --run (-r): Run a model previously converted into ggml"
36
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37
+ echo " --bench (-b): Benchmark the performance of the inference for various parameters."
38
+ echo " ex: -m model.gguf"
39
+ echo " --perplexity (-p): Measure the perplexity of a model over a given text."
40
+ echo " ex: -m model.gguf -f file.txt"
41
+ echo " --convert (-c): Convert a llama model into ggml"
42
+ echo " ex: --outtype f16 \"/models/7B/\" "
43
+ echo " --quantize (-q): Optimize with quantization process ggml"
44
+ echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
45
+ echo " --all-in-one (-a): Execute --convert & --quantize"
46
+ echo " ex: \"/models/\" 7B"
47
+ echo " --server (-s): Run a model on the server"
48
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
49
+ fi
.devops/vulkan.Dockerfile ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=24.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ # Install build tools
6
+ RUN apt update && apt install -y git build-essential cmake wget
7
+
8
+ # Install Vulkan SDK and cURL
9
+ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10
+ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
11
+ apt update -y && \
12
+ apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
13
+
14
+ # Build it
15
+ WORKDIR /app
16
+
17
+ COPY . .
18
+
19
+ RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
20
+ cmake --build build --config Release -j$(nproc)
21
+
22
+ RUN mkdir -p /app/lib && \
23
+ find build -name "*.so" -exec cp {} /app/lib \;
24
+
25
+ RUN mkdir -p /app/full \
26
+ && cp build/bin/* /app/full \
27
+ && cp *.py /app/full \
28
+ && cp -r gguf-py /app/full \
29
+ && cp -r requirements /app/full \
30
+ && cp requirements.txt /app/full \
31
+ && cp .devops/tools.sh /app/full/tools.sh
32
+
33
+ ## Base image
34
+ FROM ubuntu:$UBUNTU_VERSION AS base
35
+
36
+ RUN apt-get update \
37
+ && apt-get install -y libgomp1 curl libvulkan-dev \
38
+ && apt autoremove -y \
39
+ && apt clean -y \
40
+ && rm -rf /tmp/* /var/tmp/* \
41
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
42
+ && find /var/cache -type f -delete
43
+
44
+ COPY --from=build /app/lib/ /app
45
+
46
+ ### Full
47
+ FROM base AS full
48
+
49
+ COPY --from=build /app/full /app
50
+
51
+ WORKDIR /app
52
+
53
+ RUN apt-get update \
54
+ && apt-get install -y \
55
+ git \
56
+ python3 \
57
+ python3-pip \
58
+ python3-wheel \
59
+ && pip install --break-system-packages --upgrade setuptools \
60
+ && pip install --break-system-packages -r requirements.txt \
61
+ && apt autoremove -y \
62
+ && apt clean -y \
63
+ && rm -rf /tmp/* /var/tmp/* \
64
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
65
+ && find /var/cache -type f -delete
66
+
67
+ ENTRYPOINT ["/app/tools.sh"]
68
+
69
+ ### Light, CLI only
70
+ FROM base AS light
71
+
72
+ COPY --from=build /app/full/llama-cli /app
73
+
74
+ WORKDIR /app
75
+
76
+ ENTRYPOINT [ "/app/llama-cli" ]
77
+
78
+ ### Server, Server only
79
+ FROM base AS server
80
+
81
+ ENV LLAMA_ARG_HOST=0.0.0.0
82
+
83
+ COPY --from=build /app/full/llama-server /app
84
+
85
+ WORKDIR /app
86
+
87
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
88
+
89
+ ENTRYPOINT [ "/app/llama-server" ]
.dockerignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.o
2
+ *.a
3
+ .cache/
4
+ # Do not ignore .git directory, otherwise the reported build number will always be 0
5
+ .github/
6
+ .gitignore
7
+ .vs/
8
+ .vscode/
9
+ .DS_Store
10
+
11
+ build*/
12
+
13
+ models/*
14
+
15
+ /llama-cli
16
+ /llama-quantize
17
+
18
+ arm_neon.h
19
+ compile_commands.json
20
+ Dockerfile
.ecrc ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
3
+ "Disable": {
4
+ "IndentSize": true
5
+ }
6
+ }
.editorconfig ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://EditorConfig.org
2
+
3
+ # Top-most EditorConfig file
4
+ root = true
5
+
6
+ # Unix-style newlines with a newline ending every file, utf-8 charset
7
+ [*]
8
+ end_of_line = lf
9
+ insert_final_newline = true
10
+ trim_trailing_whitespace = true
11
+ charset = utf-8
12
+ indent_style = space
13
+ indent_size = 4
14
+
15
+ [Makefile]
16
+ indent_style = tab
17
+
18
+ [scripts/*.mk]
19
+ indent_style = tab
20
+
21
+ [prompts/*.txt]
22
+ insert_final_newline = unset
23
+
24
+ [tools/server/public/*]
25
+ indent_size = 2
26
+
27
+ [tools/server/public/deps_*]
28
+ trim_trailing_whitespace = unset
29
+ indent_style = unset
30
+ indent_size = unset
31
+
32
+ [tools/server/deps_*]
33
+ trim_trailing_whitespace = unset
34
+ indent_style = unset
35
+ indent_size = unset
36
+
37
+ [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
38
+ indent_style = tab
39
+
40
+ [tools/cvector-generator/*.txt]
41
+ trim_trailing_whitespace = unset
42
+ insert_final_newline = unset
43
+
44
+ [models/templates/*.jinja]
45
+ indent_style = unset
46
+ indent_size = unset
47
+ end_of_line = unset
48
+ charset = unset
49
+ trim_trailing_whitespace = unset
50
+ insert_final_newline = unset
51
+
52
+ [vendor/miniaudio/miniaudio.h]
53
+ trim_trailing_whitespace = unset
54
+ insert_final_newline = unset
.flake8 ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ max-line-length = 125
3
+ ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
4
+ exclude =
5
+ # Do not traverse examples and tools
6
+ examples,
7
+ tools,
8
+ # Do not include package initializers
9
+ __init__.py,
10
+ # No need to traverse our git directory
11
+ .git,
12
+ # There's no value in checking cache directories
13
+ __pycache__,
14
+ # No need to include the build path
15
+ build,
16
+ # This contains builds that we don't want to check
17
+ dist # This is generated with `python build .` for package releases
18
+ # max-complexity = 10
.gitattributes CHANGED
@@ -33,3 +33,42 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docs/development/llama-star/idea-arch.key filter=lfs diff=lfs merge=lfs -text
37
+ ggml/src/ggml-kompute/kompute/docs/images/android-editor.jpg filter=lfs diff=lfs merge=lfs -text
38
+ ggml/src/ggml-kompute/kompute/docs/images/binder-cpp.jpg filter=lfs diff=lfs merge=lfs -text
39
+ ggml/src/ggml-kompute/kompute/docs/images/binder-python.jpg filter=lfs diff=lfs merge=lfs -text
40
+ ggml/src/ggml-kompute/kompute/docs/images/kompute-cpp-video.png filter=lfs diff=lfs merge=lfs -text
41
+ ggml/src/ggml-kompute/kompute/docs/images/kompute-python-video.png filter=lfs diff=lfs merge=lfs -text
42
+ ggml/src/ggml-kompute/kompute/docs/images/kompute-vulkan-architecture.jpg filter=lfs diff=lfs merge=lfs -text
43
+ ggml/src/ggml-kompute/kompute/docs/images/komputer-2.gif filter=lfs diff=lfs merge=lfs -text
44
+ ggml/src/ggml-kompute/kompute/docs/images/komputer-godot-4.gif filter=lfs diff=lfs merge=lfs -text
45
+ ggml/src/ggml-kompute/kompute/docs/images/komputer-logos.gif filter=lfs diff=lfs merge=lfs -text
46
+ ggml/src/ggml-kompute/kompute/docs/images/logistic-regression.jpg filter=lfs diff=lfs merge=lfs -text
47
+ ggml/src/ggml-kompute/kompute/docs/images/queue-allocation.jpg filter=lfs diff=lfs merge=lfs -text
48
+ ggml/src/ggml-kompute/kompute/examples/android/android-simple/app/src/main/assets/komputer-2.gif filter=lfs diff=lfs merge=lfs -text
49
+ ggml/src/ggml-kompute/kompute/examples/godot_logistic_regression/godot_resources/assets/roboto.ttf filter=lfs diff=lfs merge=lfs -text
50
+ media/llama0-banner.png filter=lfs diff=lfs merge=lfs -text
51
+ media/llama0-logo.png filter=lfs diff=lfs merge=lfs -text
52
+ media/matmul.png filter=lfs diff=lfs merge=lfs -text
53
+ models/ggml-vocab-aquila.gguf filter=lfs diff=lfs merge=lfs -text
54
+ models/ggml-vocab-baichuan.gguf filter=lfs diff=lfs merge=lfs -text
55
+ models/ggml-vocab-bert-bge.gguf filter=lfs diff=lfs merge=lfs -text
56
+ models/ggml-vocab-command-r.gguf filter=lfs diff=lfs merge=lfs -text
57
+ models/ggml-vocab-deepseek-coder.gguf filter=lfs diff=lfs merge=lfs -text
58
+ models/ggml-vocab-deepseek-llm.gguf filter=lfs diff=lfs merge=lfs -text
59
+ models/ggml-vocab-falcon.gguf filter=lfs diff=lfs merge=lfs -text
60
+ models/ggml-vocab-gpt-2.gguf filter=lfs diff=lfs merge=lfs -text
61
+ models/ggml-vocab-gpt-neox.gguf filter=lfs diff=lfs merge=lfs -text
62
+ models/ggml-vocab-llama-bpe.gguf filter=lfs diff=lfs merge=lfs -text
63
+ models/ggml-vocab-llama-spm.gguf filter=lfs diff=lfs merge=lfs -text
64
+ models/ggml-vocab-mpt.gguf filter=lfs diff=lfs merge=lfs -text
65
+ models/ggml-vocab-nomic-bert-moe.gguf filter=lfs diff=lfs merge=lfs -text
66
+ models/ggml-vocab-phi-3.gguf filter=lfs diff=lfs merge=lfs -text
67
+ models/ggml-vocab-qwen2.gguf filter=lfs diff=lfs merge=lfs -text
68
+ models/ggml-vocab-refact.gguf filter=lfs diff=lfs merge=lfs -text
69
+ models/ggml-vocab-starcoder.gguf filter=lfs diff=lfs merge=lfs -text
70
+ tools/mtmd/test-1.jpeg filter=lfs diff=lfs merge=lfs -text
71
+ tools/mtmd/test-2.mp3 filter=lfs diff=lfs merge=lfs -text
72
+ tools/server/themes/buttons-top/buttons_top.png filter=lfs diff=lfs merge=lfs -text
73
+ tools/server/themes/wild/llamapattern.png filter=lfs diff=lfs merge=lfs -text
74
+ tools/server/themes/wild/wild.png filter=lfs diff=lfs merge=lfs -text
.github/ISSUE_TEMPLATE/010-bug-compilation.yml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (compilation)
2
+ description: Something goes wrong when trying to compile llama.cpp.
3
+ title: "Compile bug: "
4
+ labels: ["bug-unconfirmed", "compilation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the compilation of llama.cpp fails.
11
+ Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
12
+ If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
13
+ by clearing `~/.cache/ccache` (on Linux).
14
+ - type: textarea
15
+ id: commit
16
+ attributes:
17
+ label: Git commit
18
+ description: Which commit are you trying to compile?
19
+ placeholder: |
20
+ $git rev-parse HEAD
21
+ 84a07a17b1b08cf2b9747c633a2372782848a27f
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: true
38
+ - type: dropdown
39
+ id: backends
40
+ attributes:
41
+ label: GGML backends
42
+ description: Which GGML backends do you know to be affected?
43
+ options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
44
+ multiple: true
45
+ validations:
46
+ required: true
47
+ - type: textarea
48
+ id: info
49
+ attributes:
50
+ label: Problem description & steps to reproduce
51
+ description: >
52
+ Please give us a summary of the problem and tell us how to reproduce it.
53
+ If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
54
+ placeholder: >
55
+ I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
56
+ Here are the exact commands that I used: ...
57
+ validations:
58
+ required: true
59
+ - type: textarea
60
+ id: first_bad_commit
61
+ attributes:
62
+ label: First Bad Commit
63
+ description: >
64
+ If the bug was not present on an earlier version: when did it start appearing?
65
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
66
+ validations:
67
+ required: false
68
+ - type: textarea
69
+ id: command
70
+ attributes:
71
+ label: Compile command
72
+ description: >
73
+ Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
74
+ This will be automatically formatted into code, so no need for backticks.
75
+ render: shell
76
+ validations:
77
+ required: true
78
+ - type: textarea
79
+ id: logs
80
+ attributes:
81
+ label: Relevant log output
82
+ description: >
83
+ Please copy and paste any relevant log output, including any generated text.
84
+ This will be automatically formatted into code, so no need for backticks.
85
+ render: shell
86
+ validations:
87
+ required: true
.github/ISSUE_TEMPLATE/011-bug-results.yml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (model use)
2
+ description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
3
+ title: "Eval bug: "
4
+ labels: ["bug-unconfirmed", "model evaluation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the model evaluation results
11
+ (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
12
+ If you encountered the issue while using an external UI (e.g. ollama),
13
+ please reproduce your issue using one of the examples/binaries in this repository.
14
+ The `llama-cli` binary can be used for simple and reproducible model inference.
15
+ - type: textarea
16
+ id: version
17
+ attributes:
18
+ label: Name and Version
19
+ description: Which version of our software are you running? (use `--version` to get a version string)
20
+ placeholder: |
21
+ $./llama-cli --version
22
+ version: 2999 (42b4109e)
23
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
24
+ validations:
25
+ required: true
26
+ - type: dropdown
27
+ id: operating-system
28
+ attributes:
29
+ label: Operating systems
30
+ description: Which operating systems do you know to be affected?
31
+ multiple: true
32
+ options:
33
+ - Linux
34
+ - Mac
35
+ - Windows
36
+ - BSD
37
+ - Other? (Please let us know in description)
38
+ validations:
39
+ required: true
40
+ - type: dropdown
41
+ id: backends
42
+ attributes:
43
+ label: GGML backends
44
+ description: Which GGML backends do you know to be affected?
45
+ options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
46
+ multiple: true
47
+ validations:
48
+ required: true
49
+ - type: textarea
50
+ id: hardware
51
+ attributes:
52
+ label: Hardware
53
+ description: Which CPUs/GPUs are you using?
54
+ placeholder: >
55
+ e.g. Ryzen 5950X + 2x RTX 4090
56
+ validations:
57
+ required: true
58
+ - type: textarea
59
+ id: model
60
+ attributes:
61
+ label: Models
62
+ description: >
63
+ Which model(s) at which quantization were you using when encountering the bug?
64
+ If you downloaded a GGUF file off of Huggingface, please provide a link.
65
+ placeholder: >
66
+ e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
67
+ validations:
68
+ required: false
69
+ - type: textarea
70
+ id: info
71
+ attributes:
72
+ label: Problem description & steps to reproduce
73
+ description: >
74
+ Please give us a summary of the problem and tell us how to reproduce it.
75
+ If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
76
+ that information would be very much appreciated by us.
77
+ placeholder: >
78
+ e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
79
+ When I use -ngl 0 it works correctly.
80
+ Here are the exact commands that I used: ...
81
+ validations:
82
+ required: true
83
+ - type: textarea
84
+ id: first_bad_commit
85
+ attributes:
86
+ label: First Bad Commit
87
+ description: >
88
+ If the bug was not present on an earlier version: when did it start appearing?
89
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
90
+ validations:
91
+ required: false
92
+ - type: textarea
93
+ id: logs
94
+ attributes:
95
+ label: Relevant log output
96
+ description: >
97
+ Please copy and paste any relevant log output, including the command that you entered and any generated text.
98
+ This will be automatically formatted into code, so no need for backticks.
99
+ render: shell
100
+ validations:
101
+ required: true
.github/ISSUE_TEMPLATE/019-bug-misc.yml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (misc.)
2
+ description: Something is not working the way it should (and it's not covered by any of the above cases).
3
+ title: "Misc. bug: "
4
+ labels: ["bug-unconfirmed"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for miscellaneous bugs that don't fit into any other category.
11
+ If you encountered the issue while using an external UI (e.g. ollama),
12
+ please reproduce your issue using one of the examples/binaries in this repository.
13
+ - type: textarea
14
+ id: version
15
+ attributes:
16
+ label: Name and Version
17
+ description: Which version of our software is affected? (You can use `--version` to get a version string.)
18
+ placeholder: |
19
+ $./llama-cli --version
20
+ version: 2999 (42b4109e)
21
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: false
38
+ - type: dropdown
39
+ id: module
40
+ attributes:
41
+ label: Which llama.cpp modules do you know to be affected?
42
+ multiple: true
43
+ options:
44
+ - Documentation/Github
45
+ - libllama (core library)
46
+ - llama-cli
47
+ - llama-server
48
+ - llama-bench
49
+ - llama-quantize
50
+ - Python/Bash scripts
51
+ - Test code
52
+ - Other (Please specify in the next section)
53
+ validations:
54
+ required: false
55
+ - type: textarea
56
+ id: command
57
+ attributes:
58
+ label: Command line
59
+ description: >
60
+ Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
61
+ This will be automatically formatted into code, so no need for backticks.
62
+ render: shell
63
+ validations:
64
+ required: false
65
+ - type: textarea
66
+ id: info
67
+ attributes:
68
+ label: Problem description & steps to reproduce
69
+ description: >
70
+ Please give us a summary of the problem and tell us how to reproduce it (if applicable).
71
+ validations:
72
+ required: true
73
+ - type: textarea
74
+ id: first_bad_commit
75
+ attributes:
76
+ label: First Bad Commit
77
+ description: >
78
+ If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
79
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
80
+ validations:
81
+ required: false
82
+ - type: textarea
83
+ id: logs
84
+ attributes:
85
+ label: Relevant log output
86
+ description: >
87
+ If applicable, please copy and paste any relevant log output, including any generated text.
88
+ This will be automatically formatted into code, so no need for backticks.
89
+ render: shell
90
+ validations:
91
+ required: false
.github/ISSUE_TEMPLATE/020-enhancement.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Enhancement
2
+ description: Used to request enhancements for llama.cpp.
3
+ title: "Feature Request: "
4
+ labels: ["enhancement"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas)
10
+
11
+ - type: checkboxes
12
+ id: prerequisites
13
+ attributes:
14
+ label: Prerequisites
15
+ description: Please confirm the following before submitting your enhancement request.
16
+ options:
17
+ - label: I am running the latest code. Mention the version if possible as well.
18
+ required: true
19
+ - label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md).
20
+ required: true
21
+ - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
22
+ required: true
23
+ - label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share.
24
+ required: true
25
+
26
+ - type: textarea
27
+ id: feature-description
28
+ attributes:
29
+ label: Feature Description
30
+ description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
31
+ placeholder: Detailed description of the enhancement
32
+ validations:
33
+ required: true
34
+
35
+ - type: textarea
36
+ id: motivation
37
+ attributes:
38
+ label: Motivation
39
+ description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
40
+ placeholder: Explanation of why this feature is needed and its benefits
41
+ validations:
42
+ required: true
43
+
44
+ - type: textarea
45
+ id: possible-implementation
46
+ attributes:
47
+ label: Possible Implementation
48
+ description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
49
+ placeholder: Detailed description of potential implementation
50
+ validations:
51
+ required: false
.github/ISSUE_TEMPLATE/030-research.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Research
2
+ description: Track new technical research area.
3
+ title: "Research: "
4
+ labels: ["research 🔬"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
10
+
11
+ - type: checkboxes
12
+ id: research-stage
13
+ attributes:
14
+ label: Research Stage
15
+ description: Track general state of this research ticket
16
+ options:
17
+ - label: Background Research (Let's try to avoid reinventing the wheel)
18
+ - label: Hypothesis Formed (How do you think this will work and it's effect?)
19
+ - label: Strategy / Implementation Forming
20
+ - label: Analysis of results
21
+ - label: Debrief / Documentation (So people in the future can learn from us)
22
+
23
+ - type: textarea
24
+ id: background
25
+ attributes:
26
+ label: Previous existing literature and research
27
+ description: Whats the current state of the art and whats the motivation for this research?
28
+
29
+ - type: textarea
30
+ id: hypothesis
31
+ attributes:
32
+ label: Hypothesis
33
+ description: How do you think this will work and it's effect?
34
+
35
+ - type: textarea
36
+ id: implementation
37
+ attributes:
38
+ label: Implementation
39
+ description: Got an approach? e.g. a PR ready to go?
40
+
41
+ - type: textarea
42
+ id: analysis
43
+ attributes:
44
+ label: Analysis
45
+ description: How does the proposed implementation behave?
46
+
47
+ - type: textarea
48
+ id: logs
49
+ attributes:
50
+ label: Relevant log output
51
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
52
+ render: shell
.github/ISSUE_TEMPLATE/040-refactor.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Refactor (Maintainers)
2
+ description: Used to track refactoring opportunities.
3
+ title: "Refactor: "
4
+ labels: ["refactor"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
10
+ Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
11
+
12
+ - type: textarea
13
+ id: background-description
14
+ attributes:
15
+ label: Background Description
16
+ description: Please provide a detailed written description of the pain points you are trying to solve.
17
+ placeholder: Detailed description behind your motivation to request refactor
18
+ validations:
19
+ required: true
20
+
21
+ - type: textarea
22
+ id: possible-approaches
23
+ attributes:
24
+ label: Possible Refactor Approaches
25
+ description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
26
+ placeholder: Your idea of possible refactoring opportunity/approaches
27
+ validations:
28
+ required: false
.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blank_issues_enabled: true
2
+ contact_links:
3
+ - name: Got an idea?
4
+ url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas
5
+ about: Pop it there. It may then become an enhancement ticket.
6
+ - name: Got a question?
7
+ url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a
8
+ about: Ask a question there!
9
+ - name: Want to contribute?
10
+ url: https://github.com/ggml-org/llama.cpp/wiki/contribute
11
+ about: Head to the contribution guide page of the wiki for areas you can help with
.github/actions/get-tag-name/action.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Determine tag name"
2
+ description: "Determine the tag name to use for a release"
3
+ outputs:
4
+ name:
5
+ description: "The name of the tag"
6
+ value: ${{ steps.tag.outputs.name }}
7
+
8
+ runs:
9
+ using: "composite"
10
+ steps:
11
+ - name: Determine tag name
12
+ id: tag
13
+ shell: bash
14
+ run: |
15
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
16
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
17
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
18
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
19
+ else
20
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
21
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
22
+ fi
.github/actions/windows-setup-cuda/action.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Windows - Setup CUDA Toolkit"
2
+ description: "Setup CUDA Toolkit for Windows"
3
+ inputs:
4
+ cuda_version:
5
+ description: "CUDA toolkit version"
6
+ required: true
7
+
8
+ runs:
9
+ using: "composite"
10
+ steps:
11
+ - name: Install Cuda Toolkit 11.7
12
+ if: ${{ inputs.cuda_version == '11.7' }}
13
+ shell: pwsh
14
+ run: |
15
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
16
+ choco install unzip -y
17
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
18
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
19
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
20
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
21
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
22
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
23
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
24
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
25
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
26
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
27
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
28
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
29
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
30
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
31
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
32
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
33
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
34
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
35
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
36
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
37
+ echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
38
+
39
+ - name: Install Cuda Toolkit 12.4
40
+ if: ${{ inputs.cuda_version == '12.4' }}
41
+ shell: pwsh
42
+ run: |
43
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
44
+ choco install unzip -y
45
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
46
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
47
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
48
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
49
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
50
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
51
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
52
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
53
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
54
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
55
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
56
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
57
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
58
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
59
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
60
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
61
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
62
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
63
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
64
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
65
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
66
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
67
+ echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
.github/actions/windows-setup-curl/action.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: 'Windows - Setup CURL'
2
+ description: 'Composite action, to be reused in other workflow'
3
+ inputs:
4
+ curl_version:
5
+ description: 'CURL version'
6
+ required: false
7
+ default: '8.6.0_6'
8
+ architecture:
9
+ description: 'Architecture of the libcurl to download'
10
+ required: false
11
+ default: 'win64'
12
+ outputs:
13
+ curl_path:
14
+ description: "Path to the downloaded libcurl"
15
+ value: ${{ steps.get_libcurl.outputs.curl_path }}
16
+
17
+ runs:
18
+ using: "composite"
19
+ steps:
20
+ - name: libCURL
21
+ id: get_libcurl
22
+ shell: powershell
23
+ env:
24
+ CURL_VERSION: ${{ inputs.curl_version }}
25
+ ARCHITECTURE: ${{ inputs.architecture }}
26
+ run: |
27
+ curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-${env:ARCHITECTURE}-mingw.zip"
28
+ mkdir $env:RUNNER_TEMP/libcurl
29
+ tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
30
+ echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT
.github/labeler.yml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/actions/labeler
2
+ Kompute:
3
+ - changed-files:
4
+ - any-glob-to-any-file:
5
+ - ggml/include/ggml-kompute.h
6
+ - ggml/src/ggml-kompute/**
7
+ - README-kompute.md
8
+ Apple Metal:
9
+ - changed-files:
10
+ - any-glob-to-any-file:
11
+ - ggml/include/ggml-metal.h
12
+ - ggml/src/ggml-metal/**
13
+ - README-metal.md
14
+ SYCL:
15
+ - changed-files:
16
+ - any-glob-to-any-file:
17
+ - ggml/include/ggml-sycl.h
18
+ - ggml/src/ggml-sycl/**
19
+ - docs/backend/SYCL.md
20
+ - examples/sycl/**
21
+ Nvidia GPU:
22
+ - changed-files:
23
+ - any-glob-to-any-file:
24
+ - ggml/include/ggml-cuda.h
25
+ - ggml/src/ggml-cuda/**
26
+ Vulkan:
27
+ - changed-files:
28
+ - any-glob-to-any-file:
29
+ - ggml/include/ggml-vulkan.h
30
+ - ggml/src/ggml-vulkan/**
31
+ documentation:
32
+ - changed-files:
33
+ - any-glob-to-any-file:
34
+ - docs/**
35
+ - media/**
36
+ testing:
37
+ - changed-files:
38
+ - any-glob-to-any-file:
39
+ - tests/**
40
+ build:
41
+ - changed-files:
42
+ - any-glob-to-any-file:
43
+ - cmake/**
44
+ - CMakeLists.txt
45
+ - CMakePresets.json
46
+ examples:
47
+ - changed-files:
48
+ - any-glob-to-any-file:
49
+ - examples/**
50
+ - tools/**
51
+ devops:
52
+ - changed-files:
53
+ - any-glob-to-any-file:
54
+ - .devops/**
55
+ - .github/**
56
+ - ci/**
57
+ python:
58
+ - changed-files:
59
+ - any-glob-to-any-file:
60
+ - "**/*.py"
61
+ - requirements/**
62
+ - gguf-py/**
63
+ - .flake8
64
+ script:
65
+ - changed-files:
66
+ - any-glob-to-any-file:
67
+ - scripts/**
68
+ android:
69
+ - changed-files:
70
+ - any-glob-to-any-file:
71
+ - examples/llama.android/**
72
+ server:
73
+ - changed-files:
74
+ - any-glob-to-any-file:
75
+ - tools/server/**
76
+ ggml:
77
+ - changed-files:
78
+ - any-glob-to-any-file:
79
+ - ggml/**
80
+ nix:
81
+ - changed-files:
82
+ - any-glob-to-any-file:
83
+ - "**/*.nix"
84
+ - .github/workflows/nix-*.yml
85
+ - .devops/nix/nixpkgs-instances.nix
86
+ embedding:
87
+ - changed-files:
88
+ - any-glob-to-any-file: examples/embedding/
89
+
90
+ Ascend NPU:
91
+ - changed-files:
92
+ - any-glob-to-any-file:
93
+ - ggml/include/ggml-cann.h
94
+ - ggml/src/ggml-cann/**
95
+ - docs/backend/CANN.md
.github/pull_request_template.md ADDED
@@ -0,0 +1 @@
 
 
1
+ *Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
.github/workflows/bench.yml.disabled ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TODO: there have been some issues with the workflow, so disabling for now
2
+ # https://github.com/ggml-org/llama.cpp/issues/7893
3
+ #
4
+ # Benchmark
5
+ name: Benchmark
6
+
7
+ on:
8
+ workflow_dispatch:
9
+ inputs:
10
+ gpu-series:
11
+ description: 'Azure GPU series to run with'
12
+ required: true
13
+ type: choice
14
+ options:
15
+ - Standard_NC4as_T4_v3
16
+ - Standard_NC24ads_A100_v4
17
+ - Standard_NC80adis_H100_v5
18
+ sha:
19
+ description: 'Commit SHA1 to build'
20
+ required: false
21
+ type: string
22
+ duration:
23
+ description: 'Duration of the bench'
24
+ type: string
25
+ default: 10m
26
+
27
+ push:
28
+ branches:
29
+ - master
30
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
31
+ pull_request_target:
32
+ types: [opened, synchronize, reopened]
33
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
34
+ schedule:
35
+ - cron: '04 2 * * *'
36
+
37
+ concurrency:
38
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
39
+ cancel-in-progress: true
40
+
41
+ jobs:
42
+ bench-server-baseline:
43
+ runs-on: Standard_NC4as_T4_v3
44
+ env:
45
+ RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
46
+ N_USERS: 8
47
+ DURATION: 10m
48
+
49
+ strategy:
50
+ matrix:
51
+ model: [phi-2]
52
+ ftype: [q4_0, q8_0, f16]
53
+ include:
54
+ - model: phi-2
55
+ ftype: q4_0
56
+ pr_comment_enabled: "true"
57
+
58
+ if: |
59
+ inputs.gpu-series == 'Standard_NC4as_T4_v3'
60
+ || github.event_name == 'pull_request_target'
61
+ steps:
62
+ - name: Clone
63
+ id: checkout
64
+ uses: actions/checkout@v4
65
+ with:
66
+ fetch-depth: 0
67
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
68
+
69
+ - name: Install python env
70
+ id: pipenv
71
+ run: |
72
+ cd tools/server/bench
73
+ python3 -m venv venv
74
+ source venv/bin/activate
75
+ pip install -r requirements.txt
76
+
77
+ - name: Prometheus
78
+ id: install_prometheus
79
+ run: |
80
+ wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
81
+ tar xzf prometheus*.tar.gz --strip-components=1
82
+ ./prometheus --config.file=tools/server/bench/prometheus.yml &
83
+ while ! nc -z localhost 9090; do
84
+ sleep 0.1
85
+ done
86
+
87
+ - name: Set up Go
88
+ uses: actions/setup-go@v5
89
+ with:
90
+ go-version: '1.21'
91
+
92
+ - name: Install k6 and xk6-sse
93
+ id: k6_installation
94
+ run: |
95
+ cd tools/server/bench
96
+ go install go.k6.io/xk6/cmd/xk6@latest
97
+ xk6 build master \
98
+ --with github.com/phymbert/xk6-sse
99
+
100
+ - name: Build
101
+ id: cmake_build
102
+ run: |
103
+ set -eux
104
+ cmake -B build \
105
+ -DGGML_NATIVE=OFF \
106
+ -DLLAMA_BUILD_SERVER=ON \
107
+ -DLLAMA_CUBLAS=ON \
108
+ -DCUDAToolkit_ROOT=/usr/local/cuda \
109
+ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
110
+ -DCMAKE_CUDA_ARCHITECTURES=75 \
111
+ -DLLAMA_FATAL_WARNINGS=OFF \
112
+ -DLLAMA_ALL_WARNINGS=OFF \
113
+ -DCMAKE_BUILD_TYPE=Release;
114
+ cmake --build build --config Release -j $(nproc) --target llama-server
115
+
116
+ - name: Download the dataset
117
+ id: download_dataset
118
+ run: |
119
+ cd tools/server/bench
120
+ wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
121
+
122
+ - name: Server bench
123
+ id: server_bench
124
+ env:
125
+ HEAD_REF: ${{ github.head_ref || github.ref_name }}
126
+ run: |
127
+ set -eux
128
+
129
+ cd tools/server/bench
130
+ source venv/bin/activate
131
+ python bench.py \
132
+ --runner-label ${{ env.RUNNER_LABEL }} \
133
+ --name ${{ github.job }} \
134
+ --branch $HEAD_REF \
135
+ --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
136
+ --scenario script.js \
137
+ --duration ${{ github.event.inputs.duration || env.DURATION }} \
138
+ --hf-repo ggml-org/models \
139
+ --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
140
+ --model-path-prefix /models \
141
+ --parallel ${{ env.N_USERS }} \
142
+ -ngl 33 \
143
+ --batch-size 2048 \
144
+ --ubatch-size 256 \
145
+ --ctx-size 16384 \
146
+ --n-prompts 1000 \
147
+ --max-prompt-tokens 1024 \
148
+ --max-tokens 2048
149
+
150
+ cat results.github.env >> $GITHUB_ENV
151
+
152
+ # Remove dataset as we do not want it in the artefact
153
+ rm ShareGPT_V3_unfiltered_cleaned_split.json
154
+
155
+ - uses: actions/upload-artifact@v4
156
+ with:
157
+ name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
158
+ compression-level: 9
159
+ path: |
160
+ tools/server/bench/*.jpg
161
+ tools/server/bench/*.json
162
+ tools/server/bench/*.log
163
+
164
+ - name: Commit status
165
+ uses: Sibz/github-status-action@v1
166
+ with:
167
+ authToken: ${{secrets.GITHUB_TOKEN}}
168
+ sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
169
+ context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
170
+ description: |
171
+ ${{ env.BENCH_RESULTS }}
172
+ state: 'success'
173
+
174
+ - name: Upload benchmark images
175
+ uses: devicons/public-upload-to-imgur@v2.2.2
176
+ continue-on-error: true # Important as it looks unstable: 503
177
+ id: imgur_step
178
+ with:
179
+ client_id: ${{secrets.IMGUR_CLIENT_ID}}
180
+ path: |
181
+ tools/server/bench/prompt_tokens_seconds.jpg
182
+ tools/server/bench/predicted_tokens_seconds.jpg
183
+ tools/server/bench/kv_cache_usage_ratio.jpg
184
+ tools/server/bench/requests_processing.jpg
185
+
186
+ - name: Extract mermaid
187
+ id: set_mermaid
188
+ run: |
189
+ set -eux
190
+
191
+ cd tools/server/bench
192
+ PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
193
+ echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
194
+ echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
195
+ echo "EOF" >> $GITHUB_ENV
196
+
197
+ PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
198
+ echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
199
+ echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
200
+ echo "EOF" >> $GITHUB_ENV
201
+
202
+ KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
203
+ echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
204
+ echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
205
+ echo "EOF" >> $GITHUB_ENV
206
+
207
+ REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
208
+ echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
209
+ echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
210
+ echo "EOF" >> $GITHUB_ENV
211
+
212
+ - name: Extract image url
213
+ id: extract_image_url
214
+ continue-on-error: true
215
+ run: |
216
+ set -eux
217
+
218
+ echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
219
+ echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
220
+ echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
221
+ echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
222
+
223
+ - name: Comment PR
224
+ uses: mshick/add-pr-comment@v2
225
+ id: comment_pr
226
+ if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
227
+ with:
228
+ message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
229
+ message: |
230
+ <p align="center">
231
+
232
+ 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
233
+
234
+ </p>
235
+
236
+ <details>
237
+
238
+ <summary>Expand details for performance related PR only</summary>
239
+
240
+ - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
241
+ - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
242
+ - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
243
+ - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
244
+ - ${{ env.BENCH_GRAPH_XLABEL }}
245
+
246
+
247
+ <p align="center">
248
+
249
+ <img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
250
+
251
+ <details>
252
+
253
+ <summary>More</summary>
254
+
255
+ ```mermaid
256
+ ${{ env.PROMPT_TOKENS_SECONDS }}
257
+ ```
258
+
259
+ </details>
260
+
261
+ <img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
262
+
263
+ <details>
264
+ <summary>More</summary>
265
+
266
+ ```mermaid
267
+ ${{ env.PREDICTED_TOKENS_SECONDS }}
268
+ ```
269
+
270
+ </details>
271
+
272
+ </p>
273
+
274
+ <details>
275
+
276
+ <summary>Details</summary>
277
+
278
+ <p align="center">
279
+
280
+ <img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
281
+
282
+ <details>
283
+ <summary>More</summary>
284
+
285
+ ```mermaid
286
+ ${{ env.KV_CACHE_USAGE_RATIO }}
287
+ ```
288
+
289
+ </details>
290
+
291
+ <img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
292
+
293
+ <details>
294
+ <summary>More</summary>
295
+
296
+ ```mermaid
297
+ ${{ env.REQUESTS_PROCESSING }}
298
+ ```
299
+
300
+ </details>
301
+
302
+ </p>
303
+ </details>
304
+ </details>
.github/workflows/build-linux-cross.yml ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build on Linux using cross-compiler
2
+ on:
3
+ workflow_dispatch:
4
+ workflow_call:
5
+
6
+ jobs:
7
+ ubuntu-24-riscv64-cpu-cross:
8
+ runs-on: ubuntu-24.04
9
+
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+ - name: Setup Riscv
13
+ run: |
14
+ sudo dpkg --add-architecture riscv64
15
+
16
+ # Add arch-specific repositories for non-amd64 architectures
17
+ cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22
+ EOF
23
+
24
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
25
+
26
+ sudo apt-get install -y --no-install-recommends \
27
+ build-essential \
28
+ gcc-14-riscv64-linux-gnu \
29
+ g++-14-riscv64-linux-gnu
30
+
31
+ - name: Build
32
+ run: |
33
+ cmake -B build -DLLAMA_CURL=OFF \
34
+ -DCMAKE_BUILD_TYPE=Release \
35
+ -DGGML_OPENMP=OFF \
36
+ -DLLAMA_BUILD_EXAMPLES=ON \
37
+ -DLLAMA_BUILD_TOOLS=ON \
38
+ -DLLAMA_BUILD_TESTS=OFF \
39
+ -DCMAKE_SYSTEM_NAME=Linux \
40
+ -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
41
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
42
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
43
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
44
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
45
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
46
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
47
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
48
+
49
+ cmake --build build --config Release -j $(nproc)
50
+
51
+ ubuntu-24-riscv64-vulkan-cross:
52
+ runs-on: ubuntu-24.04
53
+
54
+ steps:
55
+ - uses: actions/checkout@v4
56
+ - name: Setup Riscv
57
+ run: |
58
+ sudo dpkg --add-architecture riscv64
59
+
60
+ # Add arch-specific repositories for non-amd64 architectures
61
+ cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
62
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
63
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
64
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
65
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
66
+ EOF
67
+
68
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
69
+
70
+ sudo apt-get install -y --no-install-recommends \
71
+ build-essential \
72
+ glslc \
73
+ gcc-14-riscv64-linux-gnu \
74
+ g++-14-riscv64-linux-gnu \
75
+ libvulkan-dev:riscv64
76
+
77
+ - name: Build
78
+ run: |
79
+ cmake -B build -DLLAMA_CURL=OFF \
80
+ -DCMAKE_BUILD_TYPE=Release \
81
+ -DGGML_VULKAN=ON \
82
+ -DGGML_OPENMP=OFF \
83
+ -DLLAMA_BUILD_EXAMPLES=ON \
84
+ -DLLAMA_BUILD_TOOLS=ON \
85
+ -DLLAMA_BUILD_TESTS=OFF \
86
+ -DCMAKE_SYSTEM_NAME=Linux \
87
+ -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
88
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
89
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
90
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
91
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
92
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
93
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
94
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
95
+
96
+ cmake --build build --config Release -j $(nproc)
97
+
98
+ ubuntu-24-arm64-vulkan-cross:
99
+ runs-on: ubuntu-24.04
100
+
101
+ steps:
102
+ - uses: actions/checkout@v4
103
+ - name: Setup Arm64
104
+ run: |
105
+ sudo dpkg --add-architecture arm64
106
+
107
+ # Add arch-specific repositories for non-amd64 architectures
108
+ cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
109
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
110
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
111
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
112
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
113
+ EOF
114
+
115
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
116
+
117
+ sudo apt-get install -y --no-install-recommends \
118
+ build-essential \
119
+ glslc \
120
+ crossbuild-essential-arm64 \
121
+ libvulkan-dev:arm64
122
+
123
+ - name: Build
124
+ run: |
125
+ cmake -B build -DLLAMA_CURL=OFF \
126
+ -DCMAKE_BUILD_TYPE=Release \
127
+ -DGGML_VULKAN=ON \
128
+ -DGGML_OPENMP=OFF \
129
+ -DLLAMA_BUILD_EXAMPLES=ON \
130
+ -DLLAMA_BUILD_TOOLS=ON \
131
+ -DLLAMA_BUILD_TESTS=OFF \
132
+ -DCMAKE_SYSTEM_NAME=Linux \
133
+ -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
134
+ -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \
135
+ -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \
136
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
137
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \
138
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
139
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
140
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
141
+
142
+ cmake --build build --config Release -j $(nproc)
143
+
144
+ ubuntu-24-ppc64el-cpu-cross:
145
+ runs-on: ubuntu-24.04
146
+
147
+ steps:
148
+ - uses: actions/checkout@v4
149
+ - name: Setup PowerPC64le
150
+ run: |
151
+ sudo dpkg --add-architecture ppc64el
152
+
153
+ # Add arch-specific repositories for non-amd64 architectures
154
+ cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
155
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
156
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
157
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
158
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
159
+ EOF
160
+
161
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
162
+
163
+ sudo apt-get install -y --no-install-recommends \
164
+ build-essential \
165
+ gcc-14-powerpc64le-linux-gnu \
166
+ g++-14-powerpc64le-linux-gnu
167
+
168
+ - name: Build
169
+ run: |
170
+ cmake -B build -DLLAMA_CURL=OFF \
171
+ -DCMAKE_BUILD_TYPE=Release \
172
+ -DGGML_OPENMP=OFF \
173
+ -DLLAMA_BUILD_EXAMPLES=ON \
174
+ -DLLAMA_BUILD_TOOLS=ON \
175
+ -DLLAMA_BUILD_TESTS=OFF \
176
+ -DCMAKE_SYSTEM_NAME=Linux \
177
+ -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
178
+ -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
179
+ -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
180
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
181
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
182
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
183
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
184
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
185
+
186
+ cmake --build build --config Release -j $(nproc)
187
+
188
+ ubuntu-24-ppc64el-vulkan-cross:
189
+ runs-on: ubuntu-24.04
190
+
191
+ steps:
192
+ - uses: actions/checkout@v4
193
+ - name: Setup PowerPC64le
194
+ run: |
195
+ sudo dpkg --add-architecture ppc64el
196
+
197
+ # Add arch-specific repositories for non-amd64 architectures
198
+ cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
199
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
200
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
201
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
202
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
203
+ EOF
204
+
205
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
206
+
207
+ sudo apt-get install -y --no-install-recommends \
208
+ build-essential \
209
+ glslc \
210
+ gcc-14-powerpc64le-linux-gnu \
211
+ g++-14-powerpc64le-linux-gnu \
212
+ libvulkan-dev:ppc64el
213
+
214
+ - name: Build
215
+ run: |
216
+ cmake -B build -DLLAMA_CURL=OFF \
217
+ -DCMAKE_BUILD_TYPE=Release \
218
+ -DGGML_VULKAN=ON \
219
+ -DGGML_OPENMP=OFF \
220
+ -DLLAMA_BUILD_EXAMPLES=ON \
221
+ -DLLAMA_BUILD_TOOLS=ON \
222
+ -DLLAMA_BUILD_TESTS=OFF \
223
+ -DCMAKE_SYSTEM_NAME=Linux \
224
+ -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
225
+ -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
226
+ -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
227
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
228
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
229
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
230
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
231
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
232
+
233
+ cmake --build build --config Release -j $(nproc)
234
+
235
+ debian-13-loongarch64-cpu-cross:
236
+ runs-on: ubuntu-24.04
237
+ container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
238
+
239
+ steps:
240
+ - uses: actions/checkout@v4
241
+ - name: Setup LoongArch
242
+ run: |
243
+ rm -f /etc/apt/sources.list.d/*
244
+ cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
245
+ deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
246
+ EOF
247
+ ( echo 'quiet "true";'; \
248
+ echo 'APT::Get::Assume-Yes "true";'; \
249
+ echo 'APT::Install-Recommends "false";'; \
250
+ echo 'Acquire::Check-Valid-Until "false";'; \
251
+ echo 'Acquire::Retries "5";'; \
252
+ ) > /etc/apt/apt.conf.d/99snapshot-repos
253
+
254
+ apt-get update
255
+ apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
256
+ dpkg --add-architecture loong64
257
+
258
+ # Add arch-specific repositories for non-amd64 architectures
259
+ cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
260
+ deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
261
+ EOF
262
+
263
+ apt-get update || true ;# Prevent failure due to missing URLs.
264
+
265
+ apt-get install -y --no-install-recommends \
266
+ build-essential \
267
+ gcc-14-loongarch64-linux-gnu \
268
+ g++-14-loongarch64-linux-gnu
269
+
270
+ - name: Build
271
+ run: |
272
+ cmake -B build -DLLAMA_CURL=OFF \
273
+ -DCMAKE_BUILD_TYPE=Release \
274
+ -DGGML_OPENMP=OFF \
275
+ -DLLAMA_BUILD_EXAMPLES=ON \
276
+ -DLLAMA_BUILD_TOOLS=ON \
277
+ -DLLAMA_BUILD_TESTS=OFF \
278
+ -DCMAKE_SYSTEM_NAME=Linux \
279
+ -DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
280
+ -DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
281
+ -DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
282
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
283
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
284
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
285
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
286
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
287
+
288
+ cmake --build build --config Release -j $(nproc)
289
+
290
+ debian-13-loongarch64-vulkan-cross:
291
+ runs-on: ubuntu-24.04
292
+ container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
293
+
294
+ steps:
295
+ - uses: actions/checkout@v4
296
+ - name: Setup LoongArch
297
+ run: |
298
+ rm -f /etc/apt/sources.list.d/*
299
+ cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
300
+ deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
301
+ EOF
302
+ ( echo 'quiet "true";'; \
303
+ echo 'APT::Get::Assume-Yes "true";'; \
304
+ echo 'APT::Install-Recommends "false";'; \
305
+ echo 'Acquire::Check-Valid-Until "false";'; \
306
+ echo 'Acquire::Retries "5";'; \
307
+ ) > /etc/apt/apt.conf.d/99snapshot-repos
308
+
309
+ apt-get update
310
+ apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
311
+ dpkg --add-architecture loong64
312
+
313
+ # Add arch-specific repositories for non-amd64 architectures
314
+ cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
315
+ deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
316
+ EOF
317
+
318
+ apt-get update || true ;# Prevent failure due to missing URLs.
319
+
320
+ apt-get install -y --no-install-recommends \
321
+ build-essential \
322
+ glslc \
323
+ gcc-14-loongarch64-linux-gnu \
324
+ g++-14-loongarch64-linux-gnu \
325
+ libvulkan-dev:loong64
326
+
327
+ - name: Build
328
+ run: |
329
+ cmake -B build -DLLAMA_CURL=OFF \
330
+ -DCMAKE_BUILD_TYPE=Release \
331
+ -DGGML_VULKAN=ON \
332
+ -DGGML_OPENMP=OFF \
333
+ -DLLAMA_BUILD_EXAMPLES=ON \
334
+ -DLLAMA_BUILD_TOOLS=ON \
335
+ -DLLAMA_BUILD_TESTS=OFF \
336
+ -DCMAKE_SYSTEM_NAME=Linux \
337
+ -DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
338
+ -DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
339
+ -DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
340
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
341
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
342
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
343
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
344
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
345
+
346
+ cmake --build build --config Release -j $(nproc)
.github/workflows/build.yml ADDED
@@ -0,0 +1,1080 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ push:
6
+ branches:
7
+ - master
8
+ paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
9
+ pull_request:
10
+ types: [opened, synchronize, reopened]
11
+ paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
12
+
13
+ concurrency:
14
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
15
+ cancel-in-progress: true
16
+
17
+ env:
18
+ GGML_NLOOP: 3
19
+ GGML_N_THREADS: 1
20
+ LLAMA_LOG_COLORS: 1
21
+ LLAMA_LOG_PREFIX: 1
22
+ LLAMA_LOG_TIMESTAMPS: 1
23
+
24
+ jobs:
25
+ macOS-latest-cmake-arm64:
26
+ runs-on: macos-14
27
+
28
+ steps:
29
+ - name: Clone
30
+ id: checkout
31
+ uses: actions/checkout@v4
32
+
33
+ - name: ccache
34
+ uses: hendrikmuhs/ccache-action@v1.2.16
35
+ with:
36
+ key: macOS-latest-cmake-arm64
37
+ evict-old-files: 1d
38
+
39
+ - name: Dependencies
40
+ id: depends
41
+ continue-on-error: true
42
+ run: |
43
+ brew update
44
+ brew install curl
45
+
46
+ - name: Build
47
+ id: cmake_build
48
+ run: |
49
+ sysctl -a
50
+ cmake -B build \
51
+ -DCMAKE_BUILD_RPATH="@loader_path" \
52
+ -DLLAMA_FATAL_WARNINGS=ON \
53
+ -DGGML_METAL_USE_BF16=ON \
54
+ -DGGML_METAL_EMBED_LIBRARY=ON \
55
+ -DGGML_RPC=ON
56
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
57
+
58
+ - name: Test
59
+ id: cmake_test
60
+ run: |
61
+ cd build
62
+ ctest -L 'main|curl' --verbose --timeout 900
63
+
64
+ macOS-latest-cmake-x64:
65
+ runs-on: macos-13
66
+
67
+ steps:
68
+ - name: Clone
69
+ id: checkout
70
+ uses: actions/checkout@v4
71
+
72
+ - name: ccache
73
+ uses: hendrikmuhs/ccache-action@v1.2.16
74
+ with:
75
+ key: macOS-latest-cmake-x64
76
+ evict-old-files: 1d
77
+
78
+ - name: Dependencies
79
+ id: depends
80
+ continue-on-error: true
81
+ run: |
82
+ brew update
83
+ brew install curl
84
+
85
+ - name: Build
86
+ id: cmake_build
87
+ run: |
88
+ sysctl -a
89
+ # Metal is disabled due to intermittent failures with Github runners not having a GPU:
90
+ # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
91
+ cmake -B build \
92
+ -DCMAKE_BUILD_RPATH="@loader_path" \
93
+ -DLLAMA_FATAL_WARNINGS=ON \
94
+ -DGGML_METAL=OFF \
95
+ -DGGML_RPC=ON
96
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
97
+
98
+ - name: Test
99
+ id: cmake_test
100
+ run: |
101
+ cd build
102
+ ctest -L main --verbose --timeout 900
103
+
104
+ ubuntu-cpu-cmake:
105
+ strategy:
106
+ matrix:
107
+ include:
108
+ - build: 'x64'
109
+ os: ubuntu-22.04
110
+ - build: 'arm64'
111
+ os: ubuntu-22.04-arm
112
+
113
+ runs-on: ${{ matrix.os }}
114
+
115
+ steps:
116
+ - name: Clone
117
+ id: checkout
118
+ uses: actions/checkout@v4
119
+
120
+ - name: ccache
121
+ uses: hendrikmuhs/ccache-action@v1.2.16
122
+ with:
123
+ key: ubuntu-cpu-cmake
124
+ evict-old-files: 1d
125
+
126
+ - name: Dependencies
127
+ id: depends
128
+ run: |
129
+ sudo apt-get update
130
+ sudo apt-get install build-essential libcurl4-openssl-dev
131
+
132
+ - name: Build
133
+ id: cmake_build
134
+ run: |
135
+ cmake -B build \
136
+ -DLLAMA_FATAL_WARNINGS=ON \
137
+ -DGGML_RPC=ON
138
+ cmake --build build --config Release -j $(nproc)
139
+
140
+ - name: Test
141
+ id: cmake_test
142
+ run: |
143
+ cd build
144
+ ctest -L 'main|curl' --verbose --timeout 900
145
+
146
+ - name: Test llama2c conversion
147
+ id: llama2c_test
148
+ run: |
149
+ cd build
150
+ echo "Fetch tokenizer"
151
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
152
+ echo "Fetch llama2c model"
153
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
154
+ ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
155
+ ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
156
+
157
+ ubuntu-latest-cmake-sanitizer:
158
+ runs-on: ubuntu-latest
159
+
160
+ continue-on-error: true
161
+
162
+ strategy:
163
+ matrix:
164
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
165
+ build_type: [Debug]
166
+
167
+ steps:
168
+ - name: Clone
169
+ id: checkout
170
+ uses: actions/checkout@v4
171
+
172
+ - name: ccache
173
+ uses: hendrikmuhs/ccache-action@v1.2.16
174
+ with:
175
+ key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
176
+ evict-old-files: 1d
177
+
178
+ - name: Dependencies
179
+ id: depends
180
+ run: |
181
+ sudo apt-get update
182
+ sudo apt-get install build-essential libcurl4-openssl-dev
183
+
184
+ - name: Build
185
+ id: cmake_build
186
+ if: ${{ matrix.sanitizer != 'THREAD' }}
187
+ run: |
188
+ cmake -B build \
189
+ -DLLAMA_FATAL_WARNINGS=ON \
190
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
191
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
192
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
193
+
194
+ - name: Build (no OpenMP)
195
+ id: cmake_build_no_openmp
196
+ if: ${{ matrix.sanitizer == 'THREAD' }}
197
+ run: |
198
+ cmake -B build \
199
+ -DLLAMA_FATAL_WARNINGS=ON \
200
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
201
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
202
+ -DGGML_OPENMP=OFF
203
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
204
+
205
+ - name: Test
206
+ id: cmake_test
207
+ run: |
208
+ cd build
209
+ ctest -L main --verbose --timeout 900
210
+
211
+ ubuntu-latest-llguidance:
212
+ runs-on: ubuntu-latest
213
+
214
+ steps:
215
+ - name: Clone
216
+ id: checkout
217
+ uses: actions/checkout@v4
218
+
219
+ - name: Dependencies
220
+ id: depends
221
+ run: |
222
+ sudo apt-get update
223
+ sudo apt-get install build-essential libcurl4-openssl-dev
224
+
225
+ - name: Build
226
+ id: cmake_build
227
+ run: |
228
+ mkdir build
229
+ cd build
230
+ cmake .. \
231
+ -DLLAMA_FATAL_WARNINGS=ON \
232
+ -DLLAMA_LLGUIDANCE=ON
233
+ cmake --build . --config Release -j $(nproc)
234
+
235
+ - name: Test
236
+ id: cmake_test
237
+ run: |
238
+ cd build
239
+ ctest -L main --verbose --timeout 900
240
+
241
+ ubuntu-latest-cmake-rpc:
242
+ runs-on: ubuntu-latest
243
+
244
+ continue-on-error: true
245
+
246
+ steps:
247
+ - name: Clone
248
+ id: checkout
249
+ uses: actions/checkout@v4
250
+
251
+ - name: ccache
252
+ uses: hendrikmuhs/ccache-action@v1.2.16
253
+ with:
254
+ key: ubuntu-latest-cmake-rpc
255
+ evict-old-files: 1d
256
+
257
+ - name: Dependencies
258
+ id: depends
259
+ run: |
260
+ sudo apt-get update
261
+ sudo apt-get install build-essential libcurl4-openssl-dev
262
+
263
+ - name: Build
264
+ id: cmake_build
265
+ run: |
266
+ cmake -B build \
267
+ -DGGML_RPC=ON
268
+ cmake --build build --config Release -j $(nproc)
269
+
270
+ - name: Test
271
+ id: cmake_test
272
+ run: |
273
+ cd build
274
+ ctest -L main --verbose
275
+
276
+ ubuntu-22-cmake-vulkan:
277
+ runs-on: ubuntu-22.04
278
+
279
+ steps:
280
+ - name: Clone
281
+ id: checkout
282
+ uses: actions/checkout@v4
283
+
284
+ - name: ccache
285
+ uses: hendrikmuhs/ccache-action@v1.2.16
286
+ with:
287
+ key: ubuntu-22-cmake-vulkan
288
+ evict-old-files: 1d
289
+
290
+ - name: Dependencies
291
+ id: depends
292
+ run: |
293
+ wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
294
+ sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
295
+ sudo apt-get update -y
296
+ sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev
297
+
298
+ - name: Build
299
+ id: cmake_build
300
+ run: |
301
+ cmake -B build \
302
+ -DGGML_VULKAN=ON
303
+ cmake --build build --config Release -j $(nproc)
304
+
305
+ - name: Test
306
+ id: cmake_test
307
+ run: |
308
+ cd build
309
+ export GGML_VK_VISIBLE_DEVICES=0
310
+ # This is using llvmpipe and runs slower than other backends
311
+ ctest -L main --verbose --timeout 3600
312
+
313
+ ubuntu-22-cmake-hip:
314
+ runs-on: ubuntu-22.04
315
+ container: rocm/dev-ubuntu-22.04:6.0.2
316
+
317
+ steps:
318
+ - name: Clone
319
+ id: checkout
320
+ uses: actions/checkout@v4
321
+
322
+ - name: Dependencies
323
+ id: depends
324
+ run: |
325
+ sudo apt-get update
326
+ sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev
327
+
328
+ - name: ccache
329
+ uses: hendrikmuhs/ccache-action@v1.2.16
330
+ with:
331
+ key: ubuntu-22-cmake-hip
332
+ evict-old-files: 1d
333
+
334
+ - name: Build with native CMake HIP support
335
+ id: cmake_build
336
+ run: |
337
+ cmake -B build -S . \
338
+ -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
339
+ -DGGML_HIP_ROCWMMA_FATTN=ON \
340
+ -DGGML_HIP=ON
341
+ cmake --build build --config Release -j $(nproc)
342
+
343
+ - name: Build with legacy HIP support
344
+ id: cmake_build_legacy_hip
345
+ run: |
346
+ cmake -B build2 -S . \
347
+ -DCMAKE_C_COMPILER=hipcc \
348
+ -DCMAKE_CXX_COMPILER=hipcc \
349
+ -DGGML_HIP_ROCWMMA_FATTN=ON \
350
+ -DGGML_HIP=ON
351
+ cmake --build build2 --config Release -j $(nproc)
352
+
353
+ ubuntu-22-cmake-musa:
354
+ runs-on: ubuntu-22.04
355
+ container: mthreads/musa:rc4.0.1-mudnn-devel-ubuntu22.04
356
+
357
+ steps:
358
+ - name: Clone
359
+ id: checkout
360
+ uses: actions/checkout@v4
361
+
362
+ - name: Dependencies
363
+ id: depends
364
+ run: |
365
+ apt-get update
366
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
367
+
368
+ - name: ccache
369
+ uses: hendrikmuhs/ccache-action@v1.2.16
370
+ with:
371
+ key: ubuntu-22-cmake-musa
372
+ evict-old-files: 1d
373
+
374
+ - name: Build with native CMake MUSA support
375
+ id: cmake_build
376
+ run: |
377
+ cmake -B build -S . \
378
+ -DGGML_MUSA=ON
379
+ cmake --build build --config Release -j $(nproc)
380
+
381
+ ubuntu-22-cmake-sycl:
382
+ runs-on: ubuntu-22.04
383
+
384
+ continue-on-error: true
385
+
386
+ steps:
387
+ - uses: actions/checkout@v4
388
+
389
+ - name: add oneAPI to apt
390
+ shell: bash
391
+ run: |
392
+ cd /tmp
393
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
394
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
395
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
396
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
397
+
398
+ - name: install oneAPI dpcpp compiler
399
+ shell: bash
400
+ run: |
401
+ sudo apt update
402
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev
403
+
404
+ - name: install oneAPI MKL library
405
+ shell: bash
406
+ run: |
407
+ sudo apt install intel-oneapi-mkl-devel
408
+
409
+ - name: Clone
410
+ id: checkout
411
+ uses: actions/checkout@v4
412
+
413
+ - name: ccache
414
+ uses: hendrikmuhs/ccache-action@v1.2.16
415
+ with:
416
+ key: ubuntu-22-cmake-sycl
417
+ evict-old-files: 1d
418
+
419
+ - name: Build
420
+ id: cmake_build
421
+ run: |
422
+ source /opt/intel/oneapi/setvars.sh
423
+ cmake -B build \
424
+ -DGGML_SYCL=ON \
425
+ -DCMAKE_C_COMPILER=icx \
426
+ -DCMAKE_CXX_COMPILER=icpx
427
+ cmake --build build --config Release -j $(nproc)
428
+
429
+ ubuntu-22-cmake-sycl-fp16:
430
+ runs-on: ubuntu-22.04
431
+
432
+ continue-on-error: true
433
+
434
+ steps:
435
+ - uses: actions/checkout@v4
436
+
437
+ - name: add oneAPI to apt
438
+ shell: bash
439
+ run: |
440
+ cd /tmp
441
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
442
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
443
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
444
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
445
+
446
+ - name: install oneAPI dpcpp compiler
447
+ shell: bash
448
+ run: |
449
+ sudo apt update
450
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev
451
+
452
+ - name: install oneAPI MKL library
453
+ shell: bash
454
+ run: |
455
+ sudo apt install intel-oneapi-mkl-devel
456
+
457
+ - name: Clone
458
+ id: checkout
459
+ uses: actions/checkout@v4
460
+
461
+ - name: ccache
462
+ uses: hendrikmuhs/ccache-action@v1.2.16
463
+ with:
464
+ key: ubuntu-22-cmake-sycl-fp16
465
+ evict-old-files: 1d
466
+
467
+ - name: Build
468
+ id: cmake_build
469
+ run: |
470
+ source /opt/intel/oneapi/setvars.sh
471
+ cmake -B build \
472
+ -DGGML_SYCL=ON \
473
+ -DCMAKE_C_COMPILER=icx \
474
+ -DCMAKE_CXX_COMPILER=icpx \
475
+ -DGGML_SYCL_F16=ON
476
+ cmake --build build --config Release -j $(nproc)
477
+
478
+ build-linux-cross:
479
+ uses: ./.github/workflows/build-linux-cross.yml
480
+
481
+ macOS-latest-cmake-ios:
482
+ runs-on: macos-latest
483
+
484
+ steps:
485
+ - name: Clone
486
+ id: checkout
487
+ uses: actions/checkout@v4
488
+
489
+ - name: ccache
490
+ uses: hendrikmuhs/ccache-action@v1.2.16
491
+ with:
492
+ key: macOS-latest-cmake-ios
493
+ evict-old-files: 1d
494
+
495
+ - name: Dependencies
496
+ id: depends
497
+ continue-on-error: true
498
+ run: |
499
+ brew update
500
+
501
+ - name: Build
502
+ id: cmake_build
503
+ run: |
504
+ sysctl -a
505
+ cmake -B build -G Xcode \
506
+ -DGGML_METAL_USE_BF16=ON \
507
+ -DGGML_METAL_EMBED_LIBRARY=ON \
508
+ -DLLAMA_BUILD_COMMON=OFF \
509
+ -DLLAMA_BUILD_EXAMPLES=OFF \
510
+ -DLLAMA_BUILD_TOOLS=OFF \
511
+ -DLLAMA_BUILD_TESTS=OFF \
512
+ -DLLAMA_BUILD_SERVER=OFF \
513
+ -DCMAKE_SYSTEM_NAME=iOS \
514
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
515
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
516
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
517
+
518
+ macOS-latest-cmake-tvos:
519
+ runs-on: macos-latest
520
+
521
+ steps:
522
+ - name: Clone
523
+ id: checkout
524
+ uses: actions/checkout@v4
525
+
526
+ - name: ccache
527
+ uses: hendrikmuhs/ccache-action@v1.2.16
528
+ with:
529
+ key: macOS-latest-cmake-tvos
530
+ evict-old-files: 1d
531
+
532
+ - name: Dependencies
533
+ id: depends
534
+ continue-on-error: true
535
+ run: |
536
+ brew update
537
+
538
+ - name: Build
539
+ id: cmake_build
540
+ run: |
541
+ sysctl -a
542
+ cmake -B build -G Xcode \
543
+ -DGGML_METAL_USE_BF16=ON \
544
+ -DGGML_METAL_EMBED_LIBRARY=ON \
545
+ -DLLAMA_BUILD_COMMON=OFF \
546
+ -DLLAMA_BUILD_EXAMPLES=OFF \
547
+ -DLLAMA_BUILD_TOOLS=OFF \
548
+ -DLLAMA_BUILD_TESTS=OFF \
549
+ -DLLAMA_BUILD_SERVER=OFF \
550
+ -DCMAKE_SYSTEM_NAME=tvOS \
551
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
552
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
553
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
554
+
555
+ macOS-latest-cmake-visionos:
556
+ runs-on: macos-latest
557
+
558
+ steps:
559
+ - name: Clone
560
+ id: checkout
561
+ uses: actions/checkout@v4
562
+
563
+ - name: Dependencies
564
+ id: depends
565
+ continue-on-error: true
566
+ run: |
567
+ brew update
568
+
569
+ - name: Build
570
+ id: cmake_build
571
+ run: |
572
+ sysctl -a
573
+ cmake -B build -G Xcode \
574
+ -DGGML_METAL_USE_BF16=ON \
575
+ -DGGML_METAL_EMBED_LIBRARY=ON \
576
+ -DLLAMA_BUILD_COMMON=OFF \
577
+ -DLLAMA_BUILD_EXAMPLES=OFF \
578
+ -DLLAMA_BUILD_TOOLS=OFF \
579
+ -DLLAMA_BUILD_TESTS=OFF \
580
+ -DLLAMA_BUILD_SERVER=OFF \
581
+ -DCMAKE_SYSTEM_NAME=visionOS \
582
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
583
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
584
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
585
+
586
+ macOS-latest-swift:
587
+ runs-on: macos-latest
588
+
589
+ strategy:
590
+ matrix:
591
+ destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
592
+
593
+ steps:
594
+ - name: Clone
595
+ id: checkout
596
+ uses: actions/checkout@v4
597
+
598
+ - name: ccache
599
+ uses: hendrikmuhs/ccache-action@v1.2.16
600
+ with:
601
+ key: macOS-latest-swift
602
+ evict-old-files: 1d
603
+
604
+ - name: Dependencies
605
+ id: depends
606
+ continue-on-error: true
607
+ run: |
608
+ brew update
609
+
610
+ - name: Build llama.cpp with CMake
611
+ id: cmake_build
612
+ run: |
613
+ sysctl -a
614
+ cmake -B build -G Xcode \
615
+ -DGGML_METAL_USE_BF16=ON \
616
+ -DGGML_METAL_EMBED_LIBRARY=ON \
617
+ -DLLAMA_CURL=OFF \
618
+ -DLLAMA_BUILD_EXAMPLES=OFF \
619
+ -DLLAMA_BUILD_TOOLS=OFF \
620
+ -DLLAMA_BUILD_TESTS=OFF \
621
+ -DLLAMA_BUILD_SERVER=OFF \
622
+ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
623
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
624
+
625
+ - name: xcodebuild for swift package
626
+ id: xcodebuild
627
+ run: |
628
+ ./build-xcframework.sh
629
+
630
+ windows-msys2:
631
+ runs-on: windows-latest
632
+
633
+ strategy:
634
+ fail-fast: false
635
+ matrix:
636
+ include:
637
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
638
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
639
+
640
+ steps:
641
+ - name: Clone
642
+ uses: actions/checkout@v4
643
+
644
+ - name: ccache
645
+ uses: hendrikmuhs/ccache-action@v1.2.16
646
+ with:
647
+ key: windows-msys2
648
+ variant: ccache
649
+ evict-old-files: 1d
650
+
651
+ - name: Setup ${{ matrix.sys }}
652
+ uses: msys2/setup-msys2@v2
653
+ with:
654
+ update: true
655
+ msystem: ${{matrix.sys}}
656
+ install: >-
657
+ base-devel
658
+ git
659
+ mingw-w64-${{matrix.env}}-toolchain
660
+ mingw-w64-${{matrix.env}}-cmake
661
+ mingw-w64-${{matrix.env}}-openblas
662
+
663
+ - name: Build using CMake
664
+ shell: msys2 {0}
665
+ run: |
666
+ cmake -B build
667
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
668
+
669
+ - name: Clean after building using CMake
670
+ shell: msys2 {0}
671
+ run: |
672
+ rm -rf build
673
+
674
+ - name: Build using CMake w/ OpenBLAS
675
+ shell: msys2 {0}
676
+ run: |
677
+ cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
678
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
679
+
680
+ windows-latest-cmake:
681
+ runs-on: windows-latest
682
+
683
+ env:
684
+ OPENBLAS_VERSION: 0.3.23
685
+ SDE_VERSION: 9.33.0-2024-01-07
686
+ VULKAN_VERSION: 1.4.309.0
687
+
688
+ strategy:
689
+ matrix:
690
+ include:
691
+ - build: 'cpu-x64 (static)'
692
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
693
+ - build: 'openblas-x64'
694
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
695
+ - build: 'vulkan-x64'
696
+ defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
697
+ - build: 'llvm-arm64'
698
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
699
+ - build: 'llvm-arm64-opencl-adreno'
700
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
701
+ # - build: 'kompute-x64'
702
+ # defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
703
+
704
+ steps:
705
+ - name: Clone
706
+ id: checkout
707
+ uses: actions/checkout@v4
708
+
709
+ - name: ccache
710
+ uses: hendrikmuhs/ccache-action@v1.2.16
711
+ with:
712
+ key: windows-latest-cmake-${{ matrix.build }}
713
+ variant: ccache
714
+ evict-old-files: 1d
715
+
716
+ - name: Clone Kompute submodule
717
+ id: clone_kompute
718
+ if: ${{ matrix.build == 'kompute-x64' }}
719
+ run: |
720
+ git submodule update --init ggml/src/ggml-kompute/kompute
721
+
722
+ - name: Download OpenBLAS
723
+ id: get_openblas
724
+ if: ${{ matrix.build == 'openblas-x64' }}
725
+ run: |
726
+ curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
727
+ curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
728
+ mkdir $env:RUNNER_TEMP/openblas
729
+ tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
730
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
731
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
732
+ $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
733
+ & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
734
+
735
+ - name: Install Vulkan SDK
736
+ id: get_vulkan
737
+ if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
738
+ run: |
739
+ curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
740
+ & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
741
+ Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
742
+ Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
743
+
744
+ - name: Install Ninja
745
+ id: install_ninja
746
+ run: |
747
+ choco install ninja
748
+
749
+ - name: Install OpenCL Headers and Libs
750
+ id: install_opencl
751
+ if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
752
+ run: |
753
+ git clone https://github.com/KhronosGroup/OpenCL-Headers
754
+ cd OpenCL-Headers
755
+ cmake -B build `
756
+ -DBUILD_TESTING=OFF `
757
+ -DOPENCL_HEADERS_BUILD_TESTING=OFF `
758
+ -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
759
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
760
+ cmake --build build --target install
761
+ git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
762
+ cd OpenCL-ICD-Loader
763
+ cmake -B build-arm64-release `
764
+ -A arm64 `
765
+ -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
766
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
767
+ cmake --build build-arm64-release --target install --config release
768
+
769
+ - name: libCURL
770
+ id: get_libcurl
771
+ uses: ./.github/actions/windows-setup-curl
772
+
773
+ - name: Build
774
+ id: cmake_build
775
+ env:
776
+ CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
777
+ run: |
778
+ cmake -S . -B build ${{ matrix.defines }} `
779
+ -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include"
780
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
781
+ cp $env:CURL_PATH/bin/libcurl-*.dll build/bin/Release
782
+
783
+ - name: Add libopenblas.dll
784
+ id: add_libopenblas_dll
785
+ if: ${{ matrix.build == 'openblas-x64' }}
786
+ run: |
787
+ cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
788
+ cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
789
+
790
+ - name: Test
791
+ id: cmake_test
792
+ if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }}
793
+ run: |
794
+ cd build
795
+ ctest -L main -C Release --verbose --timeout 900
796
+
797
+ # TODO: disabled for now, consider adding tests for all CPU variants instead
798
+ # - name: Test (Intel SDE)
799
+ # id: cmake_test_sde
800
+ # if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
801
+ # run: |
802
+ # curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
803
+ # # for some weird reason windows tar doesn't like sde tar.xz
804
+ # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
805
+ # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
806
+ # $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
807
+ # cd build
808
+ # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
809
+ # & $sde -future -- ctest -L main -C Release --verbose --timeout 900
810
+
811
+ ubuntu-latest-cmake-cuda:
812
+ runs-on: ubuntu-latest
813
+ container: nvidia/cuda:12.6.2-devel-ubuntu24.04
814
+
815
+ steps:
816
+ - name: Clone
817
+ id: checkout
818
+ uses: actions/checkout@v4
819
+
820
+ - name: Install dependencies
821
+ env:
822
+ DEBIAN_FRONTEND: noninteractive
823
+ run: |
824
+ apt update
825
+ apt install -y cmake build-essential ninja-build libgomp1 git libcurl4-openssl-dev
826
+
827
+ - name: ccache
828
+ uses: hendrikmuhs/ccache-action@v1.2.16
829
+ with:
830
+ key: ubuntu-latest-cmake-cuda
831
+ evict-old-files: 1d
832
+
833
+ - name: Build with CMake
834
+ run: |
835
+ cmake -S . -B build -G Ninja \
836
+ -DCMAKE_BUILD_TYPE=Release \
837
+ -DCMAKE_CUDA_ARCHITECTURES=89-real \
838
+ -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
839
+ -DLLAMA_FATAL_WARNINGS=ON \
840
+ -DGGML_NATIVE=OFF \
841
+ -DGGML_CUDA=ON
842
+ cmake --build build
843
+
844
+ windows-2022-cmake-cuda:
845
+ runs-on: windows-2022
846
+
847
+ strategy:
848
+ matrix:
849
+ cuda: ['12.4']
850
+
851
+ steps:
852
+ - name: Clone
853
+ id: checkout
854
+ uses: actions/checkout@v4
855
+
856
+ - name: Install ccache
857
+ uses: hendrikmuhs/ccache-action@v1.2.16
858
+ with:
859
+ key: windows-cuda-${{ matrix.cuda }}
860
+ variant: ccache
861
+ evict-old-files: 1d
862
+
863
+ - name: Install Cuda Toolkit
864
+ uses: ./.github/actions/windows-setup-cuda
865
+ with:
866
+ cuda_version: ${{ matrix.cuda }}
867
+
868
+ - name: Install Ninja
869
+ id: install_ninja
870
+ run: |
871
+ choco install ninja
872
+
873
+ - name: libCURL
874
+ id: get_libcurl
875
+ uses: ./.github/actions/windows-setup-curl
876
+
877
+ - name: Build
878
+ id: cmake_build
879
+ shell: cmd
880
+ env:
881
+ CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
882
+ run: |
883
+ call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
884
+ cmake -S . -B build -G "Ninja Multi-Config" ^
885
+ -DLLAMA_BUILD_SERVER=ON ^
886
+ -DGGML_NATIVE=OFF ^
887
+ -DGGML_BACKEND_DL=ON ^
888
+ -DGGML_CPU_ALL_VARIANTS=ON ^
889
+ -DGGML_CUDA=ON ^
890
+ -DGGML_RPC=ON ^
891
+ -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include"
892
+ set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
893
+ cmake --build build --config Release -j %NINJA_JOBS% -t ggml
894
+ cmake --build build --config Release
895
+
896
+ windows-latest-cmake-sycl:
897
+ runs-on: windows-latest
898
+
899
+ defaults:
900
+ run:
901
+ shell: bash
902
+
903
+ env:
904
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe
905
+ WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
906
+ ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
907
+ steps:
908
+ - name: Clone
909
+ id: checkout
910
+ uses: actions/checkout@v4
911
+
912
+ - name: ccache
913
+ uses: hendrikmuhs/ccache-action@v1.2.16
914
+ with:
915
+ key: windows-latest-cmake-sycl
916
+ variant: ccache
917
+ evict-old-files: 1d
918
+
919
+ - name: Install
920
+ run: |
921
+ scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
922
+
923
+ # TODO: add libcurl support ; we will also need to modify win-build-sycl.bat to accept user-specified args
924
+
925
+ - name: Build
926
+ id: cmake_build
927
+ run: examples/sycl/win-build-sycl.bat
928
+
929
+ windows-latest-cmake-hip:
930
+ if: ${{ github.event.inputs.create_release != 'true' }}
931
+ runs-on: windows-latest
932
+
933
+ steps:
934
+ - name: Clone
935
+ id: checkout
936
+ uses: actions/checkout@v4
937
+
938
+ - name: Clone rocWMMA repository
939
+ id: clone_rocwmma
940
+ run: |
941
+ git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
942
+
943
+ - name: Install
944
+ id: depends
945
+ run: |
946
+ $ErrorActionPreference = "Stop"
947
+ write-host "Downloading AMD HIP SDK Installer"
948
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
949
+ write-host "Installing AMD HIP SDK"
950
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
951
+ write-host "Completed AMD HIP SDK installation"
952
+
953
+ - name: Verify ROCm
954
+ id: verify
955
+ run: |
956
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
957
+
958
+ - name: Install ccache
959
+ uses: hendrikmuhs/ccache-action@v1.2.16
960
+ with:
961
+ key: ${{ github.job }}
962
+ evict-old-files: 1d
963
+
964
+ - name: libCURL
965
+ id: get_libcurl
966
+ uses: ./.github/actions/windows-setup-curl
967
+
968
+ - name: Build
969
+ id: cmake_build
970
+ env:
971
+ CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
972
+ run: |
973
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
974
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
975
+ cmake -G "Unix Makefiles" -B build -S . `
976
+ -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
977
+ -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
978
+ -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
979
+ -DCMAKE_BUILD_TYPE=Release `
980
+ -DGGML_HIP=ON `
981
+ -DGGML_HIP_ROCWMMA_FATTN=ON `
982
+ -DGGML_RPC=ON `
983
+ -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include"
984
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
985
+
986
+ ios-xcode-build:
987
+ runs-on: macos-latest
988
+
989
+ steps:
990
+ - name: Checkout code
991
+ uses: actions/checkout@v4
992
+
993
+ - name: Build
994
+ id: cmake_build
995
+ run: |
996
+ sysctl -a
997
+ cmake -B build -G Xcode \
998
+ -DGGML_METAL_USE_BF16=ON \
999
+ -DGGML_METAL_EMBED_LIBRARY=ON \
1000
+ -DLLAMA_CURL=OFF \
1001
+ -DLLAMA_BUILD_EXAMPLES=OFF \
1002
+ -DLLAMA_BUILD_TOOLS=OFF \
1003
+ -DLLAMA_BUILD_TESTS=OFF \
1004
+ -DLLAMA_BUILD_SERVER=OFF \
1005
+ -DCMAKE_SYSTEM_NAME=iOS \
1006
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
1007
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
1008
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
1009
+
1010
+ - name: xcodebuild for swift package
1011
+ id: xcodebuild
1012
+ run: |
1013
+ ./build-xcframework.sh
1014
+
1015
+ - name: Build Xcode project
1016
+ run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
1017
+
1018
+ android-build:
1019
+ runs-on: ubuntu-latest
1020
+
1021
+ steps:
1022
+ - name: Clone
1023
+ uses: actions/checkout@v4
1024
+
1025
+ - name: ccache
1026
+ uses: hendrikmuhs/ccache-action@v1.2.16
1027
+ with:
1028
+ key: android-build
1029
+ evict-old-files: 1d
1030
+
1031
+ - name: Set up JDK
1032
+ uses: actions/setup-java@v3
1033
+ with:
1034
+ java-version: 17
1035
+ distribution: zulu
1036
+
1037
+ - name: Setup Android SDK
1038
+ uses: android-actions/setup-android@v3
1039
+ with:
1040
+ log-accepted-android-sdk-licenses: false
1041
+
1042
+ - name: Build
1043
+ run: |
1044
+ cd examples/llama.android
1045
+ ./gradlew build --no-daemon
1046
+
1047
+ openEuler-latest-cmake-cann:
1048
+ if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
1049
+ defaults:
1050
+ run:
1051
+ shell: bash -el {0}
1052
+ strategy:
1053
+ matrix:
1054
+ arch: [x86, aarch64]
1055
+ cann:
1056
+ - '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
1057
+ device:
1058
+ - 'ascend910b3'
1059
+ build:
1060
+ - 'Release'
1061
+ runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
1062
+ container: ascendai/cann:${{ matrix.cann }}
1063
+ steps:
1064
+ - name: Checkout
1065
+ uses: actions/checkout@v4
1066
+
1067
+ - name: Dependencies
1068
+ run: |
1069
+ yum update -y
1070
+ yum install -y git gcc gcc-c++ make cmake libcurl-devel
1071
+
1072
+ - name: Build
1073
+ run: |
1074
+ export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1075
+
1076
+ cmake -S . -B build \
1077
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
1078
+ -DGGML_CANN=on \
1079
+ -DSOC_TYPE=${{ matrix.device }}
1080
+ cmake --build build -j $(nproc)
.github/workflows/close-issue.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Close inactive issues
2
+ on:
3
+ schedule:
4
+ - cron: "42 0 * * *"
5
+
6
+ # Fine-grant permission
7
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8
+ permissions:
9
+ issues: write
10
+
11
+ jobs:
12
+ close-issues:
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ issues: write
16
+ pull-requests: write
17
+ steps:
18
+ - uses: actions/stale@v5
19
+ with:
20
+ exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
21
+ days-before-issue-stale: 30
22
+ days-before-issue-close: 14
23
+ stale-issue-label: "stale"
24
+ close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
25
+ days-before-pr-stale: -1
26
+ days-before-pr-close: -1
27
+ operations-per-run: 10000
28
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
.github/workflows/docker.yml ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+
6
+ # GitHub recommends pinning actions to a commit SHA.
7
+ # To get a newer version, you will need to update the SHA.
8
+ # You can also reference a tag or branch, but the action may change without warning.
9
+
10
+ name: Publish Docker image
11
+
12
+ on:
13
+ workflow_dispatch: # allows manual triggering
14
+ schedule:
15
+ # Rebuild daily rather than on every push because it is expensive
16
+ - cron: '12 4 * * *'
17
+
18
+ concurrency:
19
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
+ cancel-in-progress: true
21
+
22
+ # Fine-grant permission
23
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24
+ permissions:
25
+ packages: write
26
+
27
+ jobs:
28
+ push_to_registry:
29
+ name: Push Docker image to Docker Hub
30
+
31
+ runs-on: ubuntu-22.04
32
+ env:
33
+ COMMIT_SHA: ${{ github.sha }}
34
+ strategy:
35
+ fail-fast: false
36
+ matrix:
37
+ config:
38
+ # Multi-stage build
39
+ # Note: the arm64 images are failing, which prevents the amd64 images from being built
40
+ # https://github.com/ggml-org/llama.cpp/issues/11888
41
+ #- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
42
+ - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
43
+ - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
44
+ - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
45
+ - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
46
+ - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
47
+ # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
48
+ #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
49
+ steps:
50
+ - name: Check out the repo
51
+ uses: actions/checkout@v4
52
+ with:
53
+ fetch-depth: 0 # preserve git history, so we can determine the build number
54
+
55
+ - name: Set up QEMU
56
+ uses: docker/setup-qemu-action@v3
57
+ with:
58
+ image: tonistiigi/binfmt:qemu-v7.0.0-28
59
+
60
+ - name: Set up Docker Buildx
61
+ uses: docker/setup-buildx-action@v3
62
+
63
+ - name: Log in to Docker Hub
64
+ uses: docker/login-action@v2
65
+ with:
66
+ registry: ghcr.io
67
+ username: ${{ github.repository_owner }}
68
+ password: ${{ secrets.GITHUB_TOKEN }}
69
+
70
+ - name: Determine tag name
71
+ id: tag
72
+ shell: bash
73
+ run: |
74
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
75
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
76
+ REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
77
+ REPO_NAME="${{ github.event.repository.name }}"
78
+
79
+ # determine tag name postfix (build number, commit hash)
80
+ if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
81
+ TAG_POSTFIX="-b${BUILD_NUMBER}"
82
+ else
83
+ SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
84
+ TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
85
+ fi
86
+ # list all tags possible
87
+ if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
88
+ TYPE=""
89
+ else
90
+ TYPE="-${{ matrix.config.tag }}"
91
+ fi
92
+ PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
93
+ FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
94
+ LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
95
+ SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
96
+ echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
97
+ echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
98
+ echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
99
+ echo "full_output_tags=$FULLTAGS" # print out for debugging
100
+ echo "light_output_tags=$LIGHTTAGS" # print out for debugging
101
+ echo "server_output_tags=$SERVERTAGS" # print out for debugging
102
+ env:
103
+ GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
104
+ GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
105
+
106
+ - name: Free Disk Space (Ubuntu)
107
+ if: ${{ matrix.config.free_disk_space == true }}
108
+ uses: ggml-org/free-disk-space@v1.3.1
109
+ with:
110
+ # this might remove tools that are actually needed,
111
+ # if set to "true" but frees about 6 GB
112
+ tool-cache: false
113
+
114
+ # all of these default to true, but feel free to set to
115
+ # "false" if necessary for your workflow
116
+ android: true
117
+ dotnet: true
118
+ haskell: true
119
+ large-packages: true
120
+ docker-images: true
121
+ swap-storage: true
122
+
123
+ - name: Build and push Full Docker image (tagged + versioned)
124
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
125
+ uses: docker/build-push-action@v6
126
+ with:
127
+ context: .
128
+ push: true
129
+ platforms: ${{ matrix.config.platforms }}
130
+ # tag list is generated from step above
131
+ tags: ${{ steps.tag.outputs.full_output_tags }}
132
+ file: ${{ matrix.config.dockerfile }}
133
+ target: full
134
+ provenance: false
135
+ # using github experimental cache
136
+ cache-from: type=gha
137
+ cache-to: type=gha,mode=max
138
+ # return to this if the experimental github cache is having issues
139
+ #cache-to: type=local,dest=/tmp/.buildx-cache
140
+ #cache-from: type=local,src=/tmp/.buildx-cache
141
+
142
+ - name: Build and push Light Docker image (tagged + versioned)
143
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
144
+ uses: docker/build-push-action@v6
145
+ with:
146
+ context: .
147
+ push: true
148
+ platforms: ${{ matrix.config.platforms }}
149
+ # tag list is generated from step above
150
+ tags: ${{ steps.tag.outputs.light_output_tags }}
151
+ file: ${{ matrix.config.dockerfile }}
152
+ target: light
153
+ provenance: false
154
+ # using github experimental cache
155
+ cache-from: type=gha
156
+ cache-to: type=gha,mode=max
157
+ # return to this if the experimental github cache is having issues
158
+ #cache-to: type=local,dest=/tmp/.buildx-cache
159
+ #cache-from: type=local,src=/tmp/.buildx-cache
160
+
161
+ - name: Build and push Server Docker image (tagged + versioned)
162
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
163
+ uses: docker/build-push-action@v6
164
+ with:
165
+ context: .
166
+ push: true
167
+ platforms: ${{ matrix.config.platforms }}
168
+ # tag list is generated from step above
169
+ tags: ${{ steps.tag.outputs.server_output_tags }}
170
+ file: ${{ matrix.config.dockerfile }}
171
+ target: server
172
+ provenance: false
173
+ # using github experimental cache
174
+ cache-from: type=gha
175
+ cache-to: type=gha,mode=max
176
+ # return to this if the experimental github cache is having issues
177
+ #cache-to: type=local,dest=/tmp/.buildx-cache
178
+ #cache-from: type=local,src=/tmp/.buildx-cache
.github/workflows/editorconfig.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: EditorConfig Checker
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ inputs:
6
+ create_release:
7
+ description: 'Create new release'
8
+ required: true
9
+ type: boolean
10
+ push:
11
+ branches:
12
+ - master
13
+ pull_request:
14
+ branches:
15
+ - master
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ editorconfig:
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: editorconfig-checker/action-editorconfig-checker@v2
27
+ with:
28
+ version: v3.0.3
29
+ - run: editorconfig-checker
.github/workflows/gguf-publish.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow will upload a Python Package using Twine when a GGUF release is created
2
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
+
4
+ # See `gguf-py/README.md` for how to make a release.
5
+
6
+ # This workflow uses actions that are not certified by GitHub.
7
+ # They are provided by a third-party and are governed by
8
+ # separate terms of service, privacy policy, and support
9
+ # documentation.
10
+
11
+ name: Upload Python Package
12
+
13
+ on:
14
+ workflow_dispatch:
15
+ push:
16
+ # Pattern matched against refs/tags
17
+ tags:
18
+ - 'gguf-v*' # Push events to every version tag
19
+
20
+
21
+ jobs:
22
+ deploy:
23
+
24
+ runs-on: ubuntu-latest
25
+
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - name: Set up Python
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: '3.9.x'
32
+ - name: Install dependencies
33
+ run: |
34
+ cd gguf-py
35
+ python -m pip install poetry
36
+ poetry install
37
+
38
+ - name: Build package
39
+ run: cd gguf-py && poetry build
40
+ - name: Publish package
41
+ uses: pypa/gh-action-pypi-publish@release/v1
42
+ with:
43
+ password: ${{ secrets.PYPI_API_TOKEN }}
44
+ packages-dir: gguf-py/dist
.github/workflows/labeler.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Pull Request Labeler"
2
+ on:
3
+ - pull_request_target
4
+
5
+ jobs:
6
+ labeler:
7
+ permissions:
8
+ contents: read
9
+ pull-requests: write
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ with:
14
+ repository: "ggml-org/llama.cpp"
15
+ - uses: actions/labeler@v5
16
+ with:
17
+ configuration-path: '.github/labeler.yml'
.github/workflows/python-check-requirements.yml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python check requirements.txt
2
+
3
+ on:
4
+ push:
5
+ paths:
6
+ - '.github/workflows/python-check-requirements.yml'
7
+ - 'scripts/check-requirements.sh'
8
+ - 'convert*.py'
9
+ - '**/requirements*.txt'
10
+ pull_request:
11
+ paths:
12
+ - '.github/workflows/python-check-requirements.yml'
13
+ - 'scripts/check-requirements.sh'
14
+ - 'convert*.py'
15
+ - '**/requirements*.txt'
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ python-check-requirements:
23
+ runs-on: ubuntu-latest
24
+ name: check-requirements
25
+ steps:
26
+ - name: Check out source repository
27
+ uses: actions/checkout@v4
28
+ - name: Set up Python environment
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: "3.11"
32
+ - name: Run check-requirements.sh script
33
+ run: bash scripts/check-requirements.sh
.github/workflows/python-lint.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: flake8 Lint
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
8
+ pull_request:
9
+ types: [opened, synchronize, reopened]
10
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
11
+
12
+ concurrency:
13
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
14
+ cancel-in-progress: true
15
+
16
+ jobs:
17
+ flake8-lint:
18
+ runs-on: ubuntu-latest
19
+ name: Lint
20
+ steps:
21
+ - name: Check out source repository
22
+ uses: actions/checkout@v4
23
+ - name: Set up Python environment
24
+ uses: actions/setup-python@v5
25
+ with:
26
+ python-version: "3.11"
27
+ - name: flake8 Lint
28
+ uses: py-actions/flake8@v2
29
+ with:
30
+ plugins: "flake8-no-print"