llamacpp
Browse filesThis view is limited to 50 files because it contains too many changes.  
							See raw diff
- .gitattributes +23 -0
 - llama.cpp/.clang-format +161 -0
 - llama.cpp/.clang-tidy +26 -0
 - llama.cpp/.devops/cloud-v-pipeline +22 -0
 - llama.cpp/.devops/full-cuda.Dockerfile +33 -0
 - llama.cpp/.devops/full-musa.Dockerfile +33 -0
 - llama.cpp/.devops/full-rocm.Dockerfile +50 -0
 - llama.cpp/.devops/full.Dockerfile +25 -0
 - llama.cpp/.devops/llama-cli-cann.Dockerfile +44 -0
 - llama.cpp/.devops/llama-cli-cuda.Dockerfile +38 -0
 - llama.cpp/.devops/llama-cli-intel.Dockerfile +28 -0
 - llama.cpp/.devops/llama-cli-musa.Dockerfile +38 -0
 - llama.cpp/.devops/llama-cli-rocm.Dockerfile +45 -0
 - llama.cpp/.devops/llama-cli-vulkan.Dockerfile +27 -0
 - llama.cpp/.devops/llama-cli.Dockerfile +23 -0
 - llama.cpp/.devops/llama-cpp-cuda.srpm.spec +83 -0
 - llama.cpp/.devops/llama-cpp.srpm.spec +85 -0
 - llama.cpp/.devops/llama-server-cuda.Dockerfile +43 -0
 - llama.cpp/.devops/llama-server-intel.Dockerfile +34 -0
 - llama.cpp/.devops/llama-server-musa.Dockerfile +43 -0
 - llama.cpp/.devops/llama-server-rocm.Dockerfile +54 -0
 - llama.cpp/.devops/llama-server-vulkan.Dockerfile +31 -0
 - llama.cpp/.devops/llama-server.Dockerfile +41 -0
 - llama.cpp/.devops/nix/apps.nix +21 -0
 - llama.cpp/.devops/nix/devshells.nix +52 -0
 - llama.cpp/.devops/nix/docker.nix +37 -0
 - llama.cpp/.devops/nix/jetson-support.nix +39 -0
 - llama.cpp/.devops/nix/nixpkgs-instances.nix +45 -0
 - llama.cpp/.devops/nix/package-gguf-py.nix +36 -0
 - llama.cpp/.devops/nix/package.nix +246 -0
 - llama.cpp/.devops/nix/python-scripts.nix +66 -0
 - llama.cpp/.devops/nix/scope.nix +41 -0
 - llama.cpp/.devops/nix/sif.nix +27 -0
 - llama.cpp/.devops/tools.sh +41 -0
 - llama.cpp/.dockerignore +20 -0
 - llama.cpp/.ecrc +6 -0
 - llama.cpp/.editorconfig +42 -0
 - llama.cpp/.flake8 +17 -0
 - llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +77 -0
 - llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
 - llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml +81 -0
 - llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
 - llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml +52 -0
 - llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
 - llama.cpp/.github/ISSUE_TEMPLATE/config.yml +11 -0
 - llama.cpp/.github/labeler.yml +86 -0
 - llama.cpp/.github/pull_request_template.md +1 -0
 - llama.cpp/.github/workflows/bench.yml.disabled +315 -0
 - llama.cpp/.github/workflows/build.yml +1416 -0
 - llama.cpp/.github/workflows/close-issue.yml +28 -0
 
    	
        .gitattributes
    CHANGED
    
    | 
         @@ -35,3 +35,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 
     | 
|
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 36 | 
         
             
            llama_lora_model_1/tokenizer.json filter=lfs diff=lfs merge=lfs -text
         
     | 
| 37 | 
         
             
            outputs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 36 | 
         
             
            llama_lora_model_1/tokenizer.json filter=lfs diff=lfs merge=lfs -text
         
     | 
| 37 | 
         
             
            outputs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
         
     | 
| 38 | 
         
            +
            llama.cpp/build/bin/llama-export-lora filter=lfs diff=lfs merge=lfs -text
         
     | 
| 39 | 
         
            +
            llama.cpp/build/bin/llama-imatrix filter=lfs diff=lfs merge=lfs -text
         
     | 
| 40 | 
         
            +
            llama.cpp/build/bin/llama-llava-cli filter=lfs diff=lfs merge=lfs -text
         
     | 
| 41 | 
         
            +
            llama.cpp/build/bin/llama-minicpmv-cli filter=lfs diff=lfs merge=lfs -text
         
     | 
| 42 | 
         
            +
            llama.cpp/build/bin/llama-perplexity filter=lfs diff=lfs merge=lfs -text
         
     | 
| 43 | 
         
            +
            llama.cpp/build/bin/llama-server filter=lfs diff=lfs merge=lfs -text
         
     | 
| 44 | 
         
            +
            llama.cpp/build/common/libcommon.a filter=lfs diff=lfs merge=lfs -text
         
     | 
| 45 | 
         
            +
            llama.cpp/build/examples/server/CMakeFiles/llama-server.dir/server.cpp.o filter=lfs diff=lfs merge=lfs -text
         
     | 
| 46 | 
         
            +
            llama.cpp/build/src/CMakeFiles/llama.dir/llama.cpp.o filter=lfs diff=lfs merge=lfs -text
         
     | 
| 47 | 
         
            +
            llama.cpp/build/src/libllama.so filter=lfs diff=lfs merge=lfs -text
         
     | 
| 48 | 
         
            +
            llama.cpp/models/ggml-vocab-aquila.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 49 | 
         
            +
            llama.cpp/models/ggml-vocab-baichuan.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 50 | 
         
            +
            llama.cpp/models/ggml-vocab-command-r.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 51 | 
         
            +
            llama.cpp/models/ggml-vocab-deepseek-coder.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 52 | 
         
            +
            llama.cpp/models/ggml-vocab-deepseek-llm.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 53 | 
         
            +
            llama.cpp/models/ggml-vocab-falcon.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 54 | 
         
            +
            llama.cpp/models/ggml-vocab-gpt-2.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 55 | 
         
            +
            llama.cpp/models/ggml-vocab-gpt-neox.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 56 | 
         
            +
            llama.cpp/models/ggml-vocab-llama-bpe.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 57 | 
         
            +
            llama.cpp/models/ggml-vocab-mpt.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 58 | 
         
            +
            llama.cpp/models/ggml-vocab-qwen2.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 59 | 
         
            +
            llama.cpp/models/ggml-vocab-refact.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
| 60 | 
         
            +
            llama.cpp/models/ggml-vocab-starcoder.gguf filter=lfs diff=lfs merge=lfs -text
         
     | 
    	
        llama.cpp/.clang-format
    ADDED
    
    | 
         @@ -0,0 +1,161 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ---
         
     | 
| 2 | 
         
            +
            Language:        Cpp
         
     | 
| 3 | 
         
            +
            AlignAfterOpenBracket: Align
         
     | 
| 4 | 
         
            +
            AlignArrayOfStructures: Left
         
     | 
| 5 | 
         
            +
            AlignConsecutiveAssignments: AcrossComments
         
     | 
| 6 | 
         
            +
            AlignConsecutiveBitFields: AcrossComments
         
     | 
| 7 | 
         
            +
            AlignConsecutiveDeclarations: AcrossComments
         
     | 
| 8 | 
         
            +
            AlignConsecutiveMacros: AcrossComments
         
     | 
| 9 | 
         
            +
            # AlignConsecutiveShortCaseStatements: AcrossComments
         
     | 
| 10 | 
         
            +
            AlignEscapedNewlines: Left # LeftWithLastLine
         
     | 
| 11 | 
         
            +
            AlignOperands:   Align
         
     | 
| 12 | 
         
            +
            AlignTrailingComments:
         
     | 
| 13 | 
         
            +
              Kind: Always
         
     | 
| 14 | 
         
            +
              OverEmptyLines: 1
         
     | 
| 15 | 
         
            +
            AllowAllArgumentsOnNextLine: true
         
     | 
| 16 | 
         
            +
            AllowAllParametersOfDeclarationOnNextLine: false
         
     | 
| 17 | 
         
            +
            # AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
         
     | 
| 18 | 
         
            +
            AllowShortBlocksOnASingleLine: Never
         
     | 
| 19 | 
         
            +
            AllowShortCaseLabelsOnASingleLine: false
         
     | 
| 20 | 
         
            +
            AllowShortFunctionsOnASingleLine: Inline
         
     | 
| 21 | 
         
            +
            AllowShortIfStatementsOnASingleLine: Never
         
     | 
| 22 | 
         
            +
            AllowShortLambdasOnASingleLine: Inline
         
     | 
| 23 | 
         
            +
            AllowShortLoopsOnASingleLine: false
         
     | 
| 24 | 
         
            +
            AlwaysBreakBeforeMultilineStrings: true
         
     | 
| 25 | 
         
            +
            BinPackArguments: true
         
     | 
| 26 | 
         
            +
            BinPackParameters: true # OnePerLine
         
     | 
| 27 | 
         
            +
            BitFieldColonSpacing: Both
         
     | 
| 28 | 
         
            +
            BreakBeforeBraces: Custom # Attach
         
     | 
| 29 | 
         
            +
            BraceWrapping:
         
     | 
| 30 | 
         
            +
              AfterCaseLabel:  true
         
     | 
| 31 | 
         
            +
              AfterClass:      false
         
     | 
| 32 | 
         
            +
              AfterControlStatement: false
         
     | 
| 33 | 
         
            +
              AfterEnum:       false
         
     | 
| 34 | 
         
            +
              AfterFunction:   false
         
     | 
| 35 | 
         
            +
              AfterNamespace:  false
         
     | 
| 36 | 
         
            +
              AfterObjCDeclaration: false
         
     | 
| 37 | 
         
            +
              AfterStruct:     false
         
     | 
| 38 | 
         
            +
              AfterUnion:      false
         
     | 
| 39 | 
         
            +
              AfterExternBlock: false
         
     | 
| 40 | 
         
            +
              BeforeCatch:     false
         
     | 
| 41 | 
         
            +
              BeforeElse:      false
         
     | 
| 42 | 
         
            +
              BeforeLambdaBody: false
         
     | 
| 43 | 
         
            +
              BeforeWhile: false
         
     | 
| 44 | 
         
            +
              IndentBraces:    false
         
     | 
| 45 | 
         
            +
              SplitEmptyFunction: false
         
     | 
| 46 | 
         
            +
              SplitEmptyRecord: false
         
     | 
| 47 | 
         
            +
              SplitEmptyNamespace: false
         
     | 
| 48 | 
         
            +
            # BreakAdjacentStringLiterals: true
         
     | 
| 49 | 
         
            +
            BreakAfterAttributes: Never
         
     | 
| 50 | 
         
            +
            BreakBeforeBinaryOperators: None
         
     | 
| 51 | 
         
            +
            BreakBeforeInlineASMColon: OnlyMultiline
         
     | 
| 52 | 
         
            +
            BreakBeforeTernaryOperators: false
         
     | 
| 53 | 
         
            +
            # BreakBinaryOperations: Never
         
     | 
| 54 | 
         
            +
            BreakConstructorInitializers: AfterColon
         
     | 
| 55 | 
         
            +
            # BreakFunctionDefinitionParameters: false
         
     | 
| 56 | 
         
            +
            BreakInheritanceList: AfterComma
         
     | 
| 57 | 
         
            +
            BreakStringLiterals: true
         
     | 
| 58 | 
         
            +
            # BreakTemplateDeclarations: Yes
         
     | 
| 59 | 
         
            +
            ColumnLimit:     120
         
     | 
| 60 | 
         
            +
            CommentPragmas:  '^ IWYU pragma:'
         
     | 
| 61 | 
         
            +
            CompactNamespaces: false
         
     | 
| 62 | 
         
            +
            ConstructorInitializerIndentWidth: 4
         
     | 
| 63 | 
         
            +
            ContinuationIndentWidth: 4
         
     | 
| 64 | 
         
            +
            Cpp11BracedListStyle: false
         
     | 
| 65 | 
         
            +
            DerivePointerAlignment: false
         
     | 
| 66 | 
         
            +
            DisableFormat:   false
         
     | 
| 67 | 
         
            +
            EmptyLineBeforeAccessModifier: Leave
         
     | 
| 68 | 
         
            +
            EmptyLineAfterAccessModifier: Never
         
     | 
| 69 | 
         
            +
            ExperimentalAutoDetectBinPacking: false
         
     | 
| 70 | 
         
            +
            FixNamespaceComments: true
         
     | 
| 71 | 
         
            +
            IncludeBlocks:   Regroup
         
     | 
| 72 | 
         
            +
            IncludeCategories:
         
     | 
| 73 | 
         
            +
              - Regex:           '^<.*\.h>'
         
     | 
| 74 | 
         
            +
                Priority:        1
         
     | 
| 75 | 
         
            +
                SortPriority:    0
         
     | 
| 76 | 
         
            +
              - Regex:           '^<.*'
         
     | 
| 77 | 
         
            +
                Priority:        2
         
     | 
| 78 | 
         
            +
                SortPriority:    0
         
     | 
| 79 | 
         
            +
              - Regex:           '.*'
         
     | 
| 80 | 
         
            +
                Priority:        3
         
     | 
| 81 | 
         
            +
                SortPriority:    0
         
     | 
| 82 | 
         
            +
            IncludeIsMainRegex: '([-_](test|unittest))?$'
         
     | 
| 83 | 
         
            +
            IncludeIsMainSourceRegex: ''
         
     | 
| 84 | 
         
            +
            IndentAccessModifiers: false
         
     | 
| 85 | 
         
            +
            IndentCaseBlocks: true
         
     | 
| 86 | 
         
            +
            IndentCaseLabels: true
         
     | 
| 87 | 
         
            +
            IndentExternBlock: NoIndent
         
     | 
| 88 | 
         
            +
            IndentGotoLabels: false
         
     | 
| 89 | 
         
            +
            IndentPPDirectives: AfterHash
         
     | 
| 90 | 
         
            +
            IndentWidth:     4
         
     | 
| 91 | 
         
            +
            IndentWrappedFunctionNames: false
         
     | 
| 92 | 
         
            +
            InsertBraces:    true # NOTE: may lead to incorrect formatting
         
     | 
| 93 | 
         
            +
            InsertNewlineAtEOF: true
         
     | 
| 94 | 
         
            +
            JavaScriptQuotes: Leave
         
     | 
| 95 | 
         
            +
            JavaScriptWrapImports: true
         
     | 
| 96 | 
         
            +
            KeepEmptyLinesAtTheStartOfBlocks: false
         
     | 
| 97 | 
         
            +
            LambdaBodyIndentation: Signature
         
     | 
| 98 | 
         
            +
            LineEnding: LF
         
     | 
| 99 | 
         
            +
            MacroBlockBegin: ''
         
     | 
| 100 | 
         
            +
            MacroBlockEnd:   ''
         
     | 
| 101 | 
         
            +
            MaxEmptyLinesToKeep: 1
         
     | 
| 102 | 
         
            +
            NamespaceIndentation: None
         
     | 
| 103 | 
         
            +
            ObjCBinPackProtocolList: Auto
         
     | 
| 104 | 
         
            +
            ObjCBlockIndentWidth: 4
         
     | 
| 105 | 
         
            +
            ObjCSpaceAfterProperty: true
         
     | 
| 106 | 
         
            +
            ObjCSpaceBeforeProtocolList: true
         
     | 
| 107 | 
         
            +
            PPIndentWidth: -1
         
     | 
| 108 | 
         
            +
            PackConstructorInitializers: CurrentLine
         
     | 
| 109 | 
         
            +
            PenaltyBreakAssignment: 2
         
     | 
| 110 | 
         
            +
            PenaltyBreakBeforeFirstCallParameter: 1
         
     | 
| 111 | 
         
            +
            PenaltyBreakComment: 300
         
     | 
| 112 | 
         
            +
            PenaltyBreakFirstLessLess: 120
         
     | 
| 113 | 
         
            +
            PenaltyBreakString: 1000
         
     | 
| 114 | 
         
            +
            PenaltyBreakTemplateDeclaration: 10
         
     | 
| 115 | 
         
            +
            PenaltyExcessCharacter: 1000000
         
     | 
| 116 | 
         
            +
            PenaltyReturnTypeOnItsOwnLine: 200
         
     | 
| 117 | 
         
            +
            PointerAlignment: Middle
         
     | 
| 118 | 
         
            +
            QualifierAlignment: Left
         
     | 
| 119 | 
         
            +
            #QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
         
     | 
| 120 | 
         
            +
            RawStringFormats:
         
     | 
| 121 | 
         
            +
              - Language:        Cpp
         
     | 
| 122 | 
         
            +
                Delimiters:
         
     | 
| 123 | 
         
            +
                  - cc
         
     | 
| 124 | 
         
            +
                  - CC
         
     | 
| 125 | 
         
            +
                  - cpp
         
     | 
| 126 | 
         
            +
                  - Cpp
         
     | 
| 127 | 
         
            +
                  - CPP
         
     | 
| 128 | 
         
            +
                  - 'c++'
         
     | 
| 129 | 
         
            +
                  - 'C++'
         
     | 
| 130 | 
         
            +
                CanonicalDelimiter: ''
         
     | 
| 131 | 
         
            +
            ReferenceAlignment: Middle
         
     | 
| 132 | 
         
            +
            ReflowComments:  false # IndentOnly
         
     | 
| 133 | 
         
            +
            SeparateDefinitionBlocks: Always
         
     | 
| 134 | 
         
            +
            SortIncludes:    CaseInsensitive
         
     | 
| 135 | 
         
            +
            SortUsingDeclarations: LexicographicNumeric
         
     | 
| 136 | 
         
            +
            SpaceAfterCStyleCast: true
         
     | 
| 137 | 
         
            +
            SpaceAfterLogicalNot: false
         
     | 
| 138 | 
         
            +
            SpaceAfterTemplateKeyword: true
         
     | 
| 139 | 
         
            +
            SpaceBeforeAssignmentOperators: true
         
     | 
| 140 | 
         
            +
            SpaceBeforeCpp11BracedList: false
         
     | 
| 141 | 
         
            +
            SpaceBeforeCtorInitializerColon: true
         
     | 
| 142 | 
         
            +
            SpaceBeforeInheritanceColon: true
         
     | 
| 143 | 
         
            +
            SpaceBeforeParens: ControlStatements
         
     | 
| 144 | 
         
            +
            SpaceBeforeRangeBasedForLoopColon: true
         
     | 
| 145 | 
         
            +
            SpaceInEmptyBlock: false
         
     | 
| 146 | 
         
            +
            SpaceInEmptyParentheses: false
         
     | 
| 147 | 
         
            +
            SpacesBeforeTrailingComments: 2
         
     | 
| 148 | 
         
            +
            SpacesInAngles:  Never
         
     | 
| 149 | 
         
            +
            SpacesInContainerLiterals: true
         
     | 
| 150 | 
         
            +
            SpacesInLineCommentPrefix:
         
     | 
| 151 | 
         
            +
              Minimum: 1
         
     | 
| 152 | 
         
            +
              Maximum: -1
         
     | 
| 153 | 
         
            +
            SpacesInParentheses: false
         
     | 
| 154 | 
         
            +
            SpacesInSquareBrackets: false
         
     | 
| 155 | 
         
            +
            SpaceBeforeSquareBrackets: false
         
     | 
| 156 | 
         
            +
            Standard:        c++17
         
     | 
| 157 | 
         
            +
            TabWidth:        4
         
     | 
| 158 | 
         
            +
            UseTab:          Never
         
     | 
| 159 | 
         
            +
            WhitespaceSensitiveMacros: ['STRINGIZE']
         
     | 
| 160 | 
         
            +
            ...
         
     | 
| 161 | 
         
            +
             
     | 
    	
        llama.cpp/.clang-tidy
    ADDED
    
    | 
         @@ -0,0 +1,26 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ---
         
     | 
| 2 | 
         
            +
            Checks: >
         
     | 
| 3 | 
         
            +
                bugprone-*,
         
     | 
| 4 | 
         
            +
                -bugprone-easily-swappable-parameters,
         
     | 
| 5 | 
         
            +
                -bugprone-implicit-widening-of-multiplication-result,
         
     | 
| 6 | 
         
            +
                -bugprone-misplaced-widening-cast,
         
     | 
| 7 | 
         
            +
                -bugprone-narrowing-conversions,
         
     | 
| 8 | 
         
            +
                readability-*,
         
     | 
| 9 | 
         
            +
                -readability-avoid-unconditional-preprocessor-if,
         
     | 
| 10 | 
         
            +
                -readability-function-cognitive-complexity,
         
     | 
| 11 | 
         
            +
                -readability-identifier-length,
         
     | 
| 12 | 
         
            +
                -readability-implicit-bool-conversion,
         
     | 
| 13 | 
         
            +
                -readability-magic-numbers,
         
     | 
| 14 | 
         
            +
                -readability-uppercase-literal-suffix,
         
     | 
| 15 | 
         
            +
                -readability-simplify-boolean-expr,
         
     | 
| 16 | 
         
            +
                clang-analyzer-*,
         
     | 
| 17 | 
         
            +
                -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
         
     | 
| 18 | 
         
            +
                performance-*,
         
     | 
| 19 | 
         
            +
                portability-*,
         
     | 
| 20 | 
         
            +
                -portability-simd-intrinsics,
         
     | 
| 21 | 
         
            +
                misc-*,
         
     | 
| 22 | 
         
            +
                -misc-const-correctness,
         
     | 
| 23 | 
         
            +
                -misc-non-private-member-variables-in-classes,
         
     | 
| 24 | 
         
            +
                -misc-no-recursion,
         
     | 
| 25 | 
         
            +
                -misc-use-anonymous-namespace,
         
     | 
| 26 | 
         
            +
            FormatStyle: none
         
     | 
    	
        llama.cpp/.devops/cloud-v-pipeline
    ADDED
    
    | 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            node('x86_runner1'){            // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
         
     | 
| 2 | 
         
            +
                stage('Cleanup'){
         
     | 
| 3 | 
         
            +
                    cleanWs()               // Cleaning previous CI build in workspace
         
     | 
| 4 | 
         
            +
                }
         
     | 
| 5 | 
         
            +
                stage('checkout repo'){
         
     | 
| 6 | 
         
            +
                    retry(5){               // Retry if the cloning fails due to some reason
         
     | 
| 7 | 
         
            +
                        checkout scm        // Clone the repo on Runner
         
     | 
| 8 | 
         
            +
                    }
         
     | 
| 9 | 
         
            +
                }
         
     | 
| 10 | 
         
            +
                stage('Compiling llama.cpp'){
         
     | 
| 11 | 
         
            +
                    sh'''#!/bin/bash
         
     | 
| 12 | 
         
            +
                        make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
         
     | 
| 13 | 
         
            +
                    '''
         
     | 
| 14 | 
         
            +
                }
         
     | 
| 15 | 
         
            +
                stage('Running llama.cpp'){
         
     | 
| 16 | 
         
            +
                    sh'''#!/bin/bash
         
     | 
| 17 | 
         
            +
                        module load gnu-bin2/0.1            # loading latest versions of vector qemu and vector gcc
         
     | 
| 18 | 
         
            +
                        qemu-riscv64 -L /softwares/gnu-bin2/sysroot  -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt            # Running llama.cpp on vector qemu-riscv64
         
     | 
| 19 | 
         
            +
                        cat llama_log.txt                   # Printing results
         
     | 
| 20 | 
         
            +
                    '''
         
     | 
| 21 | 
         
            +
                }
         
     | 
| 22 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.devops/full-cuda.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,33 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 3 | 
         
            +
            ARG CUDA_VERSION=12.6.0
         
     | 
| 4 | 
         
            +
            # Target the CUDA build image
         
     | 
| 5 | 
         
            +
            ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            FROM ${BASE_CUDA_DEV_CONTAINER} AS build
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            # CUDA architecture to build for (defaults to all supported archs)
         
     | 
| 10 | 
         
            +
            ARG CUDA_DOCKER_ARCH=default
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            RUN apt-get update && \
         
     | 
| 13 | 
         
            +
                apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            COPY requirements.txt   requirements.txt
         
     | 
| 16 | 
         
            +
            COPY requirements       requirements
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            RUN pip install --upgrade pip setuptools wheel \
         
     | 
| 19 | 
         
            +
                && pip install -r requirements.txt
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            WORKDIR /app
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            COPY . .
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            # Use the default CUDA archs if not specified
         
     | 
| 26 | 
         
            +
            RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
         
     | 
| 27 | 
         
            +
                    export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
         
     | 
| 28 | 
         
            +
                fi && \
         
     | 
| 29 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
         
     | 
| 30 | 
         
            +
                cmake --build build --config Release -j$(nproc) && \
         
     | 
| 31 | 
         
            +
                cp build/bin/* .
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
            ENTRYPOINT ["/app/.devops/tools.sh"]
         
     | 
    	
        llama.cpp/.devops/full-musa.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,33 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 3 | 
         
            +
            ARG MUSA_VERSION=rc3.1.0
         
     | 
| 4 | 
         
            +
            # Target the MUSA build image
         
     | 
| 5 | 
         
            +
            ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            FROM ${BASE_MUSA_DEV_CONTAINER} AS build
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            # MUSA architecture to build for (defaults to all supported archs)
         
     | 
| 10 | 
         
            +
            ARG MUSA_DOCKER_ARCH=default
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            RUN apt-get update && \
         
     | 
| 13 | 
         
            +
                apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            COPY requirements.txt   requirements.txt
         
     | 
| 16 | 
         
            +
            COPY requirements       requirements
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            RUN pip install --upgrade pip setuptools wheel \
         
     | 
| 19 | 
         
            +
                && pip install -r requirements.txt
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            WORKDIR /app
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            COPY . .
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            # Use the default MUSA archs if not specified
         
     | 
| 26 | 
         
            +
            RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
         
     | 
| 27 | 
         
            +
                    export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
         
     | 
| 28 | 
         
            +
                fi && \
         
     | 
| 29 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
         
     | 
| 30 | 
         
            +
                cmake --build build --config Release -j$(nproc) && \
         
     | 
| 31 | 
         
            +
                cp build/bin/* .
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
            ENTRYPOINT ["/app/.devops/tools.sh"]
         
     | 
    	
        llama.cpp/.devops/full-rocm.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,50 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 4 | 
         
            +
            ARG ROCM_VERSION=5.6
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Target the CUDA build image
         
     | 
| 7 | 
         
            +
            ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            FROM ${BASE_ROCM_DEV_CONTAINER} AS build
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # Unless otherwise specified, we make a fat build.
         
     | 
| 12 | 
         
            +
            # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
         
     | 
| 13 | 
         
            +
            # This is mostly tied to rocBLAS supported archs.
         
     | 
| 14 | 
         
            +
            ARG ROCM_DOCKER_ARCH="\
         
     | 
| 15 | 
         
            +
                gfx803 \
         
     | 
| 16 | 
         
            +
                gfx900 \
         
     | 
| 17 | 
         
            +
                gfx906 \
         
     | 
| 18 | 
         
            +
                gfx908 \
         
     | 
| 19 | 
         
            +
                gfx90a \
         
     | 
| 20 | 
         
            +
                gfx1010 \
         
     | 
| 21 | 
         
            +
                gfx1030 \
         
     | 
| 22 | 
         
            +
                gfx1100 \
         
     | 
| 23 | 
         
            +
                gfx1101 \
         
     | 
| 24 | 
         
            +
                gfx1102"
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            COPY requirements.txt   requirements.txt
         
     | 
| 27 | 
         
            +
            COPY requirements       requirements
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            RUN pip install --upgrade pip setuptools wheel \
         
     | 
| 30 | 
         
            +
                && pip install -r requirements.txt
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            WORKDIR /app
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            COPY . .
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
            # Set nvcc architecture
         
     | 
| 37 | 
         
            +
            ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
         
     | 
| 38 | 
         
            +
            # Enable ROCm
         
     | 
| 39 | 
         
            +
            ENV GGML_HIPBLAS=1
         
     | 
| 40 | 
         
            +
            ENV CC=/opt/rocm/llvm/bin/clang
         
     | 
| 41 | 
         
            +
            ENV CXX=/opt/rocm/llvm/bin/clang++
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            # Enable cURL
         
     | 
| 44 | 
         
            +
            ENV LLAMA_CURL=1
         
     | 
| 45 | 
         
            +
            RUN apt-get update && \
         
     | 
| 46 | 
         
            +
                apt-get install -y libcurl4-openssl-dev
         
     | 
| 47 | 
         
            +
             
     | 
| 48 | 
         
            +
            RUN make -j$(nproc)
         
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
            ENTRYPOINT ["/app/.devops/tools.sh"]
         
     | 
    	
        llama.cpp/.devops/full.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,25 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            FROM ubuntu:$UBUNTU_VERSION AS build
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            RUN apt-get update && \
         
     | 
| 6 | 
         
            +
                apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            COPY requirements.txt   requirements.txt
         
     | 
| 9 | 
         
            +
            COPY requirements       requirements
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            RUN pip install --upgrade pip setuptools wheel \
         
     | 
| 12 | 
         
            +
                && pip install -r requirements.txt
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            WORKDIR /app
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            COPY . .
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            ENV LLAMA_CURL=1
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            RUN make -j$(nproc)
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            ENV LC_ALL=C.utf8
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            ENTRYPOINT ["/app/.devops/tools.sh"]
         
     | 
    	
        llama.cpp/.devops/llama-cli-cann.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,44 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            FROM ascendai/cann:$ASCEND_VERSION AS build
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            WORKDIR /app
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            COPY . .
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            RUN yum install -y gcc g++ cmake make
         
     | 
| 10 | 
         
            +
            ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
         
     | 
| 11 | 
         
            +
            ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
         
     | 
| 12 | 
         
            +
            ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
         
     | 
| 13 | 
         
            +
            ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
         
     | 
| 14 | 
         
            +
            ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
         
     | 
| 15 | 
         
            +
            ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
         
     | 
| 16 | 
         
            +
            ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
         
     | 
| 17 | 
         
            +
            ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
         
     | 
| 18 | 
         
            +
            ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            # find libascend_hal.so, because the drive hasn`t been mounted.
         
     | 
| 21 | 
         
            +
            ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            RUN echo "Building with static libs" && \
         
     | 
| 24 | 
         
            +
                source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
         
     | 
| 25 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF  && \
         
     | 
| 26 | 
         
            +
                cmake --build build --config Release --target llama-cli
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
            # TODO: use image with NNRT
         
     | 
| 29 | 
         
            +
            FROM ascendai/cann:$ASCEND_VERSION AS runtime
         
     | 
| 30 | 
         
            +
            COPY --from=build /app/build/bin/llama-cli /llama-cli
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            ENV LC_ALL=C.utf8
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
         
     | 
| 35 | 
         
            +
            ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
         
     | 
| 36 | 
         
            +
            ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
         
     | 
| 37 | 
         
            +
            ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
         
     | 
| 38 | 
         
            +
            ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
         
     | 
| 39 | 
         
            +
            ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
         
     | 
| 40 | 
         
            +
            ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
         
     | 
| 41 | 
         
            +
            ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
         
     | 
| 42 | 
         
            +
            ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
         
     | 
| 43 | 
         
            +
             
     | 
| 44 | 
         
            +
            ENTRYPOINT ["/llama-cli" ]
         
     | 
    	
        llama.cpp/.devops/llama-cli-cuda.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,38 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 3 | 
         
            +
            ARG CUDA_VERSION=12.6.0
         
     | 
| 4 | 
         
            +
            # Target the CUDA build image
         
     | 
| 5 | 
         
            +
            ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
         
     | 
| 6 | 
         
            +
            # Target the CUDA runtime image
         
     | 
| 7 | 
         
            +
            ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            FROM ${BASE_CUDA_DEV_CONTAINER} AS build
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # CUDA architecture to build for (defaults to all supported archs)
         
     | 
| 12 | 
         
            +
            ARG CUDA_DOCKER_ARCH=default
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            RUN apt-get update && \
         
     | 
| 15 | 
         
            +
                apt-get install -y build-essential git cmake
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            WORKDIR /app
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            COPY . .
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            # Use the default CUDA archs if not specified
         
     | 
| 22 | 
         
            +
            RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
         
     | 
| 23 | 
         
            +
                    export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
         
     | 
| 24 | 
         
            +
                fi && \
         
     | 
| 25 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
         
     | 
| 26 | 
         
            +
                cmake --build build --config Release --target llama-cli -j$(nproc) && \
         
     | 
| 27 | 
         
            +
                mkdir -p /app/lib && \
         
     | 
| 28 | 
         
            +
                find build -name "*.so" -exec cp {} /app/lib \;
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            RUN apt-get update && \
         
     | 
| 33 | 
         
            +
                apt-get install -y libgomp1
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
            COPY --from=build /app/lib/ /
         
     | 
| 36 | 
         
            +
            COPY --from=build /app/build/bin/llama-cli /
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
            ENTRYPOINT [ "/llama-cli" ]
         
     | 
    	
        llama.cpp/.devops/llama-cli-intel.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            ARG GGML_SYCL_F16=OFF
         
     | 
| 6 | 
         
            +
            RUN apt-get update && \
         
     | 
| 7 | 
         
            +
                apt-get install -y git
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            WORKDIR /app
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            COPY . .
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
         
     | 
| 14 | 
         
            +
                    echo "GGML_SYCL_F16 is set" && \
         
     | 
| 15 | 
         
            +
                    export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
         
     | 
| 16 | 
         
            +
                fi && \
         
     | 
| 17 | 
         
            +
                echo "Building with static libs" && \
         
     | 
| 18 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
         
     | 
| 19 | 
         
            +
                ${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
         
     | 
| 20 | 
         
            +
                cmake --build build --config Release --target llama-cli
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
            COPY --from=build /app/build/bin/llama-cli /llama-cli
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            ENV LC_ALL=C.utf8
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
            ENTRYPOINT [ "/llama-cli" ]
         
     | 
    	
        llama.cpp/.devops/llama-cli-musa.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,38 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 3 | 
         
            +
            ARG MUSA_VERSION=rc3.1.0
         
     | 
| 4 | 
         
            +
            # Target the MUSA build image
         
     | 
| 5 | 
         
            +
            ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
         
     | 
| 6 | 
         
            +
            # Target the MUSA runtime image
         
     | 
| 7 | 
         
            +
            ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            FROM ${BASE_MUSA_DEV_CONTAINER} AS build
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # MUSA architecture to build for (defaults to all supported archs)
         
     | 
| 12 | 
         
            +
            ARG MUSA_DOCKER_ARCH=default
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            RUN apt-get update && \
         
     | 
| 15 | 
         
            +
                apt-get install -y build-essential git cmake
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            WORKDIR /app
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            COPY . .
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            # Use the default MUSA archs if not specified
         
     | 
| 22 | 
         
            +
            RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
         
     | 
| 23 | 
         
            +
                    export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
         
     | 
| 24 | 
         
            +
                fi && \
         
     | 
| 25 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
         
     | 
| 26 | 
         
            +
                cmake --build build --config Release --target llama-cli -j$(nproc) && \
         
     | 
| 27 | 
         
            +
                mkdir -p /app/lib && \
         
     | 
| 28 | 
         
            +
                find build -name "*.so" -exec cp {} /app/lib \;
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            RUN apt-get update && \
         
     | 
| 33 | 
         
            +
                apt-get install -y libgomp1
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
            COPY --from=build /app/lib/ /
         
     | 
| 36 | 
         
            +
            COPY --from=build /app/build/bin/llama-cli /llama-cli
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
            ENTRYPOINT [ "/llama-cli" ]
         
     | 
    	
        llama.cpp/.devops/llama-cli-rocm.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,45 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 4 | 
         
            +
            ARG ROCM_VERSION=5.6
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Target the CUDA build image
         
     | 
| 7 | 
         
            +
            ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            FROM ${BASE_ROCM_DEV_CONTAINER} AS build
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # Unless otherwise specified, we make a fat build.
         
     | 
| 12 | 
         
            +
            # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
         
     | 
| 13 | 
         
            +
            # This is mostly tied to rocBLAS supported archs.
         
     | 
| 14 | 
         
            +
            ARG ROCM_DOCKER_ARCH="\
         
     | 
| 15 | 
         
            +
                gfx803 \
         
     | 
| 16 | 
         
            +
                gfx900 \
         
     | 
| 17 | 
         
            +
                gfx906 \
         
     | 
| 18 | 
         
            +
                gfx908 \
         
     | 
| 19 | 
         
            +
                gfx90a \
         
     | 
| 20 | 
         
            +
                gfx1010 \
         
     | 
| 21 | 
         
            +
                gfx1030 \
         
     | 
| 22 | 
         
            +
                gfx1100 \
         
     | 
| 23 | 
         
            +
                gfx1101 \
         
     | 
| 24 | 
         
            +
                gfx1102"
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            COPY requirements.txt   requirements.txt
         
     | 
| 27 | 
         
            +
            COPY requirements       requirements
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            RUN pip install --upgrade pip setuptools wheel \
         
     | 
| 30 | 
         
            +
                && pip install -r requirements.txt
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            WORKDIR /app
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            COPY . .
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
            # Set nvcc architecture
         
     | 
| 37 | 
         
            +
            ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
         
     | 
| 38 | 
         
            +
            # Enable ROCm
         
     | 
| 39 | 
         
            +
            ENV GGML_HIPBLAS=1
         
     | 
| 40 | 
         
            +
            ENV CC=/opt/rocm/llvm/bin/clang
         
     | 
| 41 | 
         
            +
            ENV CXX=/opt/rocm/llvm/bin/clang++
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            RUN make -j$(nproc) llama-cli
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            ENTRYPOINT [ "/app/llama-cli" ]
         
     | 
    	
        llama.cpp/.devops/llama-cli-vulkan.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,27 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=jammy
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            FROM ubuntu:$UBUNTU_VERSION AS build
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            # Install build tools
         
     | 
| 6 | 
         
            +
            RUN apt update && apt install -y git build-essential cmake wget libgomp1
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            # Install Vulkan SDK
         
     | 
| 9 | 
         
            +
            RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
         
     | 
| 10 | 
         
            +
                wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
         
     | 
| 11 | 
         
            +
                apt update -y && \
         
     | 
| 12 | 
         
            +
                apt-get install -y vulkan-sdk
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            # Build it
         
     | 
| 15 | 
         
            +
            WORKDIR /app
         
     | 
| 16 | 
         
            +
            COPY . .
         
     | 
| 17 | 
         
            +
            RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 && \
         
     | 
| 18 | 
         
            +
                cmake --build build --config Release --target llama-cli
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            # Clean up
         
     | 
| 21 | 
         
            +
            WORKDIR /
         
     | 
| 22 | 
         
            +
            RUN cp /app/build/bin/llama-cli /llama-cli && \
         
     | 
| 23 | 
         
            +
                rm -rf /app
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            ENV LC_ALL=C.utf8
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
            ENTRYPOINT [ "/llama-cli" ]
         
     | 
    	
        llama.cpp/.devops/llama-cli.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,23 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            FROM ubuntu:$UBUNTU_VERSION AS build
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            RUN apt-get update && \
         
     | 
| 6 | 
         
            +
                apt-get install -y build-essential git
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            WORKDIR /app
         
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
            COPY . .
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            RUN make -j$(nproc) llama-cli
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            FROM ubuntu:$UBUNTU_VERSION AS runtime
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            RUN apt-get update && \
         
     | 
| 17 | 
         
            +
                apt-get install -y libgomp1
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            COPY --from=build /app/llama-cli /llama-cli
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            ENV LC_ALL=C.utf8
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            ENTRYPOINT [ "/llama-cli" ]
         
     | 
    	
        llama.cpp/.devops/llama-cpp-cuda.srpm.spec
    ADDED
    
    | 
         @@ -0,0 +1,83 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # SRPM for building from source and packaging an RPM for RPM-based distros.
         
     | 
| 2 | 
         
            +
            # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
         
     | 
| 3 | 
         
            +
            # Built and maintained by John Boero - boeroboy@gmail.com
         
     | 
| 4 | 
         
            +
            # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Notes for llama.cpp:
         
     | 
| 7 | 
         
            +
            # 1. Tags are currently based on hash - which will not sort asciibetically.
         
     | 
| 8 | 
         
            +
            #    We need to declare standard versioning if people want to sort latest releases.
         
     | 
| 9 | 
         
            +
            # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
         
     | 
| 10 | 
         
            +
            # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
         
     | 
| 11 | 
         
            +
            #    Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
         
     | 
| 12 | 
         
            +
            # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
         
     | 
| 13 | 
         
            +
            #    It is up to the user to install the correct vendor-specific support.
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            Name:           llama.cpp-cuda
         
     | 
| 16 | 
         
            +
            Version:        %( date "+%%Y%%m%%d" )
         
     | 
| 17 | 
         
            +
            Release:        1%{?dist}
         
     | 
| 18 | 
         
            +
            Summary:        CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
         
     | 
| 19 | 
         
            +
            License:        MIT
         
     | 
| 20 | 
         
            +
            Source0:        https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
         
     | 
| 21 | 
         
            +
            BuildRequires:  coreutils make gcc-c++ git cuda-toolkit
         
     | 
| 22 | 
         
            +
            Requires:       cuda-toolkit
         
     | 
| 23 | 
         
            +
            URL:            https://github.com/ggerganov/llama.cpp
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            %define debug_package %{nil}
         
     | 
| 26 | 
         
            +
            %define source_date_epoch_from_changelog 0
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
            %description
         
     | 
| 29 | 
         
            +
            CPU inference for Meta's Lllama2 models using default options.
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
            %prep
         
     | 
| 32 | 
         
            +
            %setup -n llama.cpp-master
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            %build
         
     | 
| 35 | 
         
            +
            make -j GGML_CUDA=1
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            %install
         
     | 
| 38 | 
         
            +
            mkdir -p %{buildroot}%{_bindir}/
         
     | 
| 39 | 
         
            +
            cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
         
     | 
| 40 | 
         
            +
            cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
         
     | 
| 41 | 
         
            +
            cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            mkdir -p %{buildroot}/usr/lib/systemd/system
         
     | 
| 44 | 
         
            +
            %{__cat} <<EOF  > %{buildroot}/usr/lib/systemd/system/llamacuda.service
         
     | 
| 45 | 
         
            +
            [Unit]
         
     | 
| 46 | 
         
            +
            Description=Llama.cpp server, CPU only (no GPU support in this build).
         
     | 
| 47 | 
         
            +
            After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
            +
            [Service]
         
     | 
| 50 | 
         
            +
            Type=simple
         
     | 
| 51 | 
         
            +
            EnvironmentFile=/etc/sysconfig/llama
         
     | 
| 52 | 
         
            +
            ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
         
     | 
| 53 | 
         
            +
            ExecReload=/bin/kill -s HUP $MAINPID
         
     | 
| 54 | 
         
            +
            Restart=never
         
     | 
| 55 | 
         
            +
             
     | 
| 56 | 
         
            +
            [Install]
         
     | 
| 57 | 
         
            +
            WantedBy=default.target
         
     | 
| 58 | 
         
            +
            EOF
         
     | 
| 59 | 
         
            +
             
     | 
| 60 | 
         
            +
            mkdir -p %{buildroot}/etc/sysconfig
         
     | 
| 61 | 
         
            +
            %{__cat} <<EOF  > %{buildroot}/etc/sysconfig/llama
         
     | 
| 62 | 
         
            +
            LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
         
     | 
| 63 | 
         
            +
            EOF
         
     | 
| 64 | 
         
            +
             
     | 
| 65 | 
         
            +
            %clean
         
     | 
| 66 | 
         
            +
            rm -rf %{buildroot}
         
     | 
| 67 | 
         
            +
            rm -rf %{_builddir}/*
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
            %files
         
     | 
| 70 | 
         
            +
            %{_bindir}/llama-cuda-cli
         
     | 
| 71 | 
         
            +
            %{_bindir}/llama-cuda-server
         
     | 
| 72 | 
         
            +
            %{_bindir}/llama-cuda-simple
         
     | 
| 73 | 
         
            +
            /usr/lib/systemd/system/llamacuda.service
         
     | 
| 74 | 
         
            +
            %config /etc/sysconfig/llama
         
     | 
| 75 | 
         
            +
             
     | 
| 76 | 
         
            +
            %pre
         
     | 
| 77 | 
         
            +
             
     | 
| 78 | 
         
            +
            %post
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
            %preun
         
     | 
| 81 | 
         
            +
            %postun
         
     | 
| 82 | 
         
            +
             
     | 
| 83 | 
         
            +
            %changelog
         
     | 
    	
        llama.cpp/.devops/llama-cpp.srpm.spec
    ADDED
    
    | 
         @@ -0,0 +1,85 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # SRPM for building from source and packaging an RPM for RPM-based distros.
         
     | 
| 2 | 
         
            +
            # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
         
     | 
| 3 | 
         
            +
            # Built and maintained by John Boero - boeroboy@gmail.com
         
     | 
| 4 | 
         
            +
            # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Notes for llama.cpp:
         
     | 
| 7 | 
         
            +
            # 1. Tags are currently based on hash - which will not sort asciibetically.
         
     | 
| 8 | 
         
            +
            #    We need to declare standard versioning if people want to sort latest releases.
         
     | 
| 9 | 
         
            +
            #    In the meantime, YYYYMMDD format will be used.
         
     | 
| 10 | 
         
            +
            # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
         
     | 
| 11 | 
         
            +
            # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
         
     | 
| 12 | 
         
            +
            #    Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
         
     | 
| 13 | 
         
            +
            # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
         
     | 
| 14 | 
         
            +
            #    It is up to the user to install the correct vendor-specific support.
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            Name:           llama.cpp
         
     | 
| 17 | 
         
            +
            Version:        %( date "+%%Y%%m%%d" )
         
     | 
| 18 | 
         
            +
            Release:        1%{?dist}
         
     | 
| 19 | 
         
            +
            Summary:        CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
         
     | 
| 20 | 
         
            +
            License:        MIT
         
     | 
| 21 | 
         
            +
            Source0:        https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
         
     | 
| 22 | 
         
            +
            BuildRequires:  coreutils make gcc-c++ git libstdc++-devel
         
     | 
| 23 | 
         
            +
            Requires:       libstdc++
         
     | 
| 24 | 
         
            +
            URL:            https://github.com/ggerganov/llama.cpp
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            %define debug_package %{nil}
         
     | 
| 27 | 
         
            +
            %define source_date_epoch_from_changelog 0
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            %description
         
     | 
| 30 | 
         
            +
            CPU inference for Meta's Lllama2 models using default options.
         
     | 
| 31 | 
         
            +
            Models are not included in this package and must be downloaded separately.
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
            %prep
         
     | 
| 34 | 
         
            +
            %setup -n llama.cpp-master
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
            %build
         
     | 
| 37 | 
         
            +
            make -j
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
            %install
         
     | 
| 40 | 
         
            +
            mkdir -p %{buildroot}%{_bindir}/
         
     | 
| 41 | 
         
            +
            cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
         
     | 
| 42 | 
         
            +
            cp -p llama-server %{buildroot}%{_bindir}/llama-server
         
     | 
| 43 | 
         
            +
            cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            mkdir -p %{buildroot}/usr/lib/systemd/system
         
     | 
| 46 | 
         
            +
            %{__cat} <<EOF  > %{buildroot}/usr/lib/systemd/system/llama.service
         
     | 
| 47 | 
         
            +
            [Unit]
         
     | 
| 48 | 
         
            +
            Description=Llama.cpp server, CPU only (no GPU support in this build).
         
     | 
| 49 | 
         
            +
            After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
         
     | 
| 50 | 
         
            +
             
     | 
| 51 | 
         
            +
            [Service]
         
     | 
| 52 | 
         
            +
            Type=simple
         
     | 
| 53 | 
         
            +
            EnvironmentFile=/etc/sysconfig/llama
         
     | 
| 54 | 
         
            +
            ExecStart=/usr/bin/llama-server $LLAMA_ARGS
         
     | 
| 55 | 
         
            +
            ExecReload=/bin/kill -s HUP $MAINPID
         
     | 
| 56 | 
         
            +
            Restart=never
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
            [Install]
         
     | 
| 59 | 
         
            +
            WantedBy=default.target
         
     | 
| 60 | 
         
            +
            EOF
         
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
            mkdir -p %{buildroot}/etc/sysconfig
         
     | 
| 63 | 
         
            +
            %{__cat} <<EOF  > %{buildroot}/etc/sysconfig/llama
         
     | 
| 64 | 
         
            +
            LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
         
     | 
| 65 | 
         
            +
            EOF
         
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
            %clean
         
     | 
| 68 | 
         
            +
            rm -rf %{buildroot}
         
     | 
| 69 | 
         
            +
            rm -rf %{_builddir}/*
         
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
            %files
         
     | 
| 72 | 
         
            +
            %{_bindir}/llama-cli
         
     | 
| 73 | 
         
            +
            %{_bindir}/llama-server
         
     | 
| 74 | 
         
            +
            %{_bindir}/llama-simple
         
     | 
| 75 | 
         
            +
            /usr/lib/systemd/system/llama.service
         
     | 
| 76 | 
         
            +
            %config /etc/sysconfig/llama
         
     | 
| 77 | 
         
            +
             
     | 
| 78 | 
         
            +
            %pre
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
            %post
         
     | 
| 81 | 
         
            +
             
     | 
| 82 | 
         
            +
            %preun
         
     | 
| 83 | 
         
            +
            %postun
         
     | 
| 84 | 
         
            +
             
     | 
| 85 | 
         
            +
            %changelog
         
     | 
    	
        llama.cpp/.devops/llama-server-cuda.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,43 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 3 | 
         
            +
            ARG CUDA_VERSION=12.6.0
         
     | 
| 4 | 
         
            +
            # Target the CUDA build image
         
     | 
| 5 | 
         
            +
            ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
         
     | 
| 6 | 
         
            +
            # Target the CUDA runtime image
         
     | 
| 7 | 
         
            +
            ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            FROM ${BASE_CUDA_DEV_CONTAINER} AS build
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # CUDA architecture to build for (defaults to all supported archs)
         
     | 
| 12 | 
         
            +
            ARG CUDA_DOCKER_ARCH=default
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            RUN apt-get update && \
         
     | 
| 15 | 
         
            +
                apt-get install -y build-essential git cmake libcurl4-openssl-dev
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            WORKDIR /app
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            COPY . .
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            # Use the default CUDA archs if not specified
         
     | 
| 22 | 
         
            +
            RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
         
     | 
| 23 | 
         
            +
                    export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
         
     | 
| 24 | 
         
            +
                fi && \
         
     | 
| 25 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
         
     | 
| 26 | 
         
            +
                cmake --build build --config Release --target llama-server -j$(nproc) && \
         
     | 
| 27 | 
         
            +
                mkdir -p /app/lib && \
         
     | 
| 28 | 
         
            +
                find build -name "*.so" -exec cp {} /app/lib \;
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            RUN apt-get update && \
         
     | 
| 33 | 
         
            +
                apt-get install -y libcurl4-openssl-dev libgomp1 curl
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
            COPY --from=build /app/lib/ /
         
     | 
| 36 | 
         
            +
            COPY --from=build /app/build/bin/llama-server /llama-server
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
            # Must be set to 0.0.0.0 so it can listen to requests from host machine
         
     | 
| 39 | 
         
            +
            ENV LLAMA_ARG_HOST=0.0.0.0
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
            HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            ENTRYPOINT [ "/llama-server" ]
         
     | 
    	
        llama.cpp/.devops/llama-server-intel.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            ARG GGML_SYCL_F16=OFF
         
     | 
| 6 | 
         
            +
            RUN apt-get update && \
         
     | 
| 7 | 
         
            +
                apt-get install -y git libcurl4-openssl-dev
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            WORKDIR /app
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            COPY . .
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
         
     | 
| 14 | 
         
            +
                    echo "GGML_SYCL_F16 is set" && \
         
     | 
| 15 | 
         
            +
                    export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
         
     | 
| 16 | 
         
            +
                fi && \
         
     | 
| 17 | 
         
            +
                echo "Building with dynamic libs" && \
         
     | 
| 18 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
         
     | 
| 19 | 
         
            +
                cmake --build build --config Release --target llama-server
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            RUN apt-get update && \
         
     | 
| 24 | 
         
            +
                apt-get install -y libcurl4-openssl-dev curl
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            COPY --from=build /app/build/bin/llama-server /llama-server
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
            ENV LC_ALL=C.utf8
         
     | 
| 29 | 
         
            +
            # Must be set to 0.0.0.0 so it can listen to requests from host machine
         
     | 
| 30 | 
         
            +
            ENV LLAMA_ARG_HOST=0.0.0.0
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            ENTRYPOINT [ "/llama-server" ]
         
     | 
    	
        llama.cpp/.devops/llama-server-musa.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,43 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 3 | 
         
            +
            ARG MUSA_VERSION=rc3.1.0
         
     | 
| 4 | 
         
            +
            # Target the MUSA build image
         
     | 
| 5 | 
         
            +
            ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
         
     | 
| 6 | 
         
            +
            # Target the MUSA runtime image
         
     | 
| 7 | 
         
            +
            ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            FROM ${BASE_MUSA_DEV_CONTAINER} AS build
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # MUSA architecture to build for (defaults to all supported archs)
         
     | 
| 12 | 
         
            +
            ARG MUSA_DOCKER_ARCH=default
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            RUN apt-get update && \
         
     | 
| 15 | 
         
            +
                apt-get install -y build-essential git cmake libcurl4-openssl-dev
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            WORKDIR /app
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            COPY . .
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            # Use the default MUSA archs if not specified
         
     | 
| 22 | 
         
            +
            RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
         
     | 
| 23 | 
         
            +
                    export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
         
     | 
| 24 | 
         
            +
                fi && \
         
     | 
| 25 | 
         
            +
                cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
         
     | 
| 26 | 
         
            +
                cmake --build build --config Release --target llama-server -j$(nproc) && \
         
     | 
| 27 | 
         
            +
                mkdir -p /app/lib && \
         
     | 
| 28 | 
         
            +
                find build -name "*.so" -exec cp {} /app/lib \;
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            RUN apt-get update && \
         
     | 
| 33 | 
         
            +
                apt-get install -y libcurl4-openssl-dev libgomp1 curl
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
            COPY --from=build /app/lib/ /
         
     | 
| 36 | 
         
            +
            COPY --from=build /app/build/bin/llama-server /llama-server
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
            # Must be set to 0.0.0.0 so it can listen to requests from host machine
         
     | 
| 39 | 
         
            +
            ENV LLAMA_ARG_HOST=0.0.0.0
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
            HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            ENTRYPOINT [ "/llama-server" ]
         
     | 
    	
        llama.cpp/.devops/llama-server-rocm.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,54 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            # This needs to generally match the container host's environment.
         
     | 
| 4 | 
         
            +
            ARG ROCM_VERSION=5.6
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Target the CUDA build image
         
     | 
| 7 | 
         
            +
            ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            FROM ${BASE_ROCM_DEV_CONTAINER} AS build
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # Unless otherwise specified, we make a fat build.
         
     | 
| 12 | 
         
            +
            # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
         
     | 
| 13 | 
         
            +
            # This is mostly tied to rocBLAS supported archs.
         
     | 
| 14 | 
         
            +
            ARG ROCM_DOCKER_ARCH="\
         
     | 
| 15 | 
         
            +
                gfx803 \
         
     | 
| 16 | 
         
            +
                gfx900 \
         
     | 
| 17 | 
         
            +
                gfx906 \
         
     | 
| 18 | 
         
            +
                gfx908 \
         
     | 
| 19 | 
         
            +
                gfx90a \
         
     | 
| 20 | 
         
            +
                gfx1010 \
         
     | 
| 21 | 
         
            +
                gfx1030 \
         
     | 
| 22 | 
         
            +
                gfx1100 \
         
     | 
| 23 | 
         
            +
                gfx1101 \
         
     | 
| 24 | 
         
            +
                gfx1102"
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            COPY requirements.txt   requirements.txt
         
     | 
| 27 | 
         
            +
            COPY requirements       requirements
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            RUN pip install --upgrade pip setuptools wheel \
         
     | 
| 30 | 
         
            +
                && pip install -r requirements.txt
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            WORKDIR /app
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            COPY . .
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
            # Set nvcc architecture
         
     | 
| 37 | 
         
            +
            ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
         
     | 
| 38 | 
         
            +
            # Enable ROCm
         
     | 
| 39 | 
         
            +
            ENV GGML_HIPBLAS=1
         
     | 
| 40 | 
         
            +
            ENV CC=/opt/rocm/llvm/bin/clang
         
     | 
| 41 | 
         
            +
            ENV CXX=/opt/rocm/llvm/bin/clang++
         
     | 
| 42 | 
         
            +
            # Must be set to 0.0.0.0 so it can listen to requests from host machine
         
     | 
| 43 | 
         
            +
            ENV LLAMA_ARG_HOST=0.0.0.0
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            # Enable cURL
         
     | 
| 46 | 
         
            +
            ENV LLAMA_CURL=1
         
     | 
| 47 | 
         
            +
            RUN apt-get update && \
         
     | 
| 48 | 
         
            +
                apt-get install -y libcurl4-openssl-dev curl
         
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
            RUN make -j$(nproc) llama-server
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
            HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
         
     | 
| 53 | 
         
            +
             
     | 
| 54 | 
         
            +
            ENTRYPOINT [ "/app/llama-server" ]
         
     | 
    	
        llama.cpp/.devops/llama-server-vulkan.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,31 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=jammy
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            FROM ubuntu:$UBUNTU_VERSION AS build
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            # Install build tools
         
     | 
| 6 | 
         
            +
            RUN apt update && apt install -y git build-essential cmake wget
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            # Install Vulkan SDK and cURL
         
     | 
| 9 | 
         
            +
            RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
         
     | 
| 10 | 
         
            +
                wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
         
     | 
| 11 | 
         
            +
                apt update -y && \
         
     | 
| 12 | 
         
            +
                apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            # Build it
         
     | 
| 15 | 
         
            +
            WORKDIR /app
         
     | 
| 16 | 
         
            +
            COPY . .
         
     | 
| 17 | 
         
            +
            RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
         
     | 
| 18 | 
         
            +
                cmake --build build --config Release --target llama-server
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            # Clean up
         
     | 
| 21 | 
         
            +
            WORKDIR /
         
     | 
| 22 | 
         
            +
            RUN cp /app/build/bin/llama-server /llama-server && \
         
     | 
| 23 | 
         
            +
                rm -rf /app
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            ENV LC_ALL=C.utf8
         
     | 
| 26 | 
         
            +
            # Must be set to 0.0.0.0 so it can listen to requests from host machine
         
     | 
| 27 | 
         
            +
            ENV LLAMA_ARG_HOST=0.0.0.0
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
            ENTRYPOINT [ "/llama-server" ]
         
     | 
    	
        llama.cpp/.devops/llama-server.Dockerfile
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ARG UBUNTU_VERSION=22.04
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            FROM ubuntu:$UBUNTU_VERSION AS build
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            RUN apt-get update && \
         
     | 
| 6 | 
         
            +
                apt-get install -y build-essential git cmake libcurl4-openssl-dev
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            WORKDIR /app
         
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
            COPY . .
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            RUN \
         
     | 
| 14 | 
         
            +
                # Build multiple versions of the CPU backend
         
     | 
| 15 | 
         
            +
                scripts/build-cpu.sh avx         -DGGML_AVX=ON -DGGML_AVX2=OFF && \
         
     | 
| 16 | 
         
            +
                scripts/build-cpu.sh avx2        -DGGML_AVX=ON -DGGML_AVX2=ON && \
         
     | 
| 17 | 
         
            +
                scripts/build-cpu.sh avx512      -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
         
     | 
| 18 | 
         
            +
                scripts/build-cpu.sh amx         -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
         
     | 
| 19 | 
         
            +
                # Build llama-server
         
     | 
| 20 | 
         
            +
                cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
         
     | 
| 21 | 
         
            +
                cmake --build build --target llama-server -j $(nproc) && \
         
     | 
| 22 | 
         
            +
                # Copy the built libraries to /app/lib
         
     | 
| 23 | 
         
            +
                mkdir -p /app/lib && \
         
     | 
| 24 | 
         
            +
                mv libggml-cpu* /app/lib/ && \
         
     | 
| 25 | 
         
            +
                find build -name "*.so" -exec cp {} /app/lib/ \;
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
            FROM ubuntu:$UBUNTU_VERSION AS runtime
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
            RUN apt-get update && \
         
     | 
| 30 | 
         
            +
                apt-get install -y libcurl4-openssl-dev libgomp1 curl
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            COPY --from=build /app/build/bin/llama-server /llama-server
         
     | 
| 33 | 
         
            +
            COPY --from=build /app/lib/ /
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
            ENV LC_ALL=C.utf8
         
     | 
| 36 | 
         
            +
            # Must be set to 0.0.0.0 so it can listen to requests from host machine
         
     | 
| 37 | 
         
            +
            ENV LLAMA_ARG_HOST=0.0.0.0
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
            HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
            ENTRYPOINT [ "/llama-server" ]
         
     | 
    	
        llama.cpp/.devops/nix/apps.nix
    ADDED
    
    | 
         @@ -0,0 +1,21 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              perSystem =
         
     | 
| 3 | 
         
            +
                { config, lib, ... }:
         
     | 
| 4 | 
         
            +
                {
         
     | 
| 5 | 
         
            +
                  apps =
         
     | 
| 6 | 
         
            +
                    let
         
     | 
| 7 | 
         
            +
                      inherit (config.packages) default;
         
     | 
| 8 | 
         
            +
                      binaries = [
         
     | 
| 9 | 
         
            +
                        "llama-cli"
         
     | 
| 10 | 
         
            +
                        "llama-embedding"
         
     | 
| 11 | 
         
            +
                        "llama-server"
         
     | 
| 12 | 
         
            +
                        "llama-quantize"
         
     | 
| 13 | 
         
            +
                      ];
         
     | 
| 14 | 
         
            +
                      mkApp = name: {
         
     | 
| 15 | 
         
            +
                        type = "app";
         
     | 
| 16 | 
         
            +
                        program = "${default}/bin/${name}";
         
     | 
| 17 | 
         
            +
                      };
         
     | 
| 18 | 
         
            +
                    in
         
     | 
| 19 | 
         
            +
                    lib.genAttrs binaries mkApp;
         
     | 
| 20 | 
         
            +
                };
         
     | 
| 21 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.devops/nix/devshells.nix
    ADDED
    
    | 
         @@ -0,0 +1,52 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            { inputs, ... }:
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            {
         
     | 
| 4 | 
         
            +
              perSystem =
         
     | 
| 5 | 
         
            +
                {
         
     | 
| 6 | 
         
            +
                  config,
         
     | 
| 7 | 
         
            +
                  lib,
         
     | 
| 8 | 
         
            +
                  system,
         
     | 
| 9 | 
         
            +
                  ...
         
     | 
| 10 | 
         
            +
                }:
         
     | 
| 11 | 
         
            +
                {
         
     | 
| 12 | 
         
            +
                  devShells =
         
     | 
| 13 | 
         
            +
                    let
         
     | 
| 14 | 
         
            +
                      pkgs = import inputs.nixpkgs { inherit system; };
         
     | 
| 15 | 
         
            +
                      stdenv = pkgs.stdenv;
         
     | 
| 16 | 
         
            +
                      scripts = config.packages.python-scripts;
         
     | 
| 17 | 
         
            +
                    in
         
     | 
| 18 | 
         
            +
                    lib.pipe (config.packages) [
         
     | 
| 19 | 
         
            +
                      (lib.concatMapAttrs (
         
     | 
| 20 | 
         
            +
                        name: package: {
         
     | 
| 21 | 
         
            +
                          ${name} = pkgs.mkShell {
         
     | 
| 22 | 
         
            +
                            name = "${name}";
         
     | 
| 23 | 
         
            +
                            inputsFrom = [ package ];
         
     | 
| 24 | 
         
            +
                            shellHook = ''
         
     | 
| 25 | 
         
            +
                              echo "Entering ${name} devShell"
         
     | 
| 26 | 
         
            +
                            '';
         
     | 
| 27 | 
         
            +
                          };
         
     | 
| 28 | 
         
            +
                          "${name}-extra" =
         
     | 
| 29 | 
         
            +
                            if (name == "python-scripts") then
         
     | 
| 30 | 
         
            +
                              null
         
     | 
| 31 | 
         
            +
                            else
         
     | 
| 32 | 
         
            +
                              pkgs.mkShell {
         
     | 
| 33 | 
         
            +
                                name = "${name}-extra";
         
     | 
| 34 | 
         
            +
                                inputsFrom = [
         
     | 
| 35 | 
         
            +
                                  package
         
     | 
| 36 | 
         
            +
                                  scripts
         
     | 
| 37 | 
         
            +
                                ];
         
     | 
| 38 | 
         
            +
                                # Extra packages that *may* be used by some scripts
         
     | 
| 39 | 
         
            +
                                packages = [
         
     | 
| 40 | 
         
            +
                                    pkgs.python3Packages.tiktoken
         
     | 
| 41 | 
         
            +
                                ];
         
     | 
| 42 | 
         
            +
                                shellHook = ''
         
     | 
| 43 | 
         
            +
                                  echo "Entering ${name} devShell"
         
     | 
| 44 | 
         
            +
                                  addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
         
     | 
| 45 | 
         
            +
                                '';
         
     | 
| 46 | 
         
            +
                              };
         
     | 
| 47 | 
         
            +
                        }
         
     | 
| 48 | 
         
            +
                      ))
         
     | 
| 49 | 
         
            +
                      (lib.filterAttrs (name: value: value != null))
         
     | 
| 50 | 
         
            +
                    ];
         
     | 
| 51 | 
         
            +
                };
         
     | 
| 52 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.devops/nix/docker.nix
    ADDED
    
    | 
         @@ -0,0 +1,37 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              lib,
         
     | 
| 3 | 
         
            +
              dockerTools,
         
     | 
| 4 | 
         
            +
              buildEnv,
         
     | 
| 5 | 
         
            +
              llama-cpp,
         
     | 
| 6 | 
         
            +
              interactive ? true,
         
     | 
| 7 | 
         
            +
              coreutils,
         
     | 
| 8 | 
         
            +
            }:
         
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
            # A tar that can be fed into `docker load`:
         
     | 
| 11 | 
         
            +
            #
         
     | 
| 12 | 
         
            +
            # $ nix build .#llamaPackages.docker
         
     | 
| 13 | 
         
            +
            # $ docker load < result
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            # For details and variations cf.
         
     | 
| 16 | 
         
            +
            # - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
         
     | 
| 17 | 
         
            +
            # - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
         
     | 
| 18 | 
         
            +
            # - https://nixery.dev/
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            # Approximate (compressed) sizes, at the time of writing, are:
         
     | 
| 21 | 
         
            +
            #
         
     | 
| 22 | 
         
            +
            # .#llamaPackages.docker: 125M;
         
     | 
| 23 | 
         
            +
            # .#llamaPackagesCuda.docker: 537M;
         
     | 
| 24 | 
         
            +
            # .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            dockerTools.buildLayeredImage {
         
     | 
| 27 | 
         
            +
              name = llama-cpp.pname;
         
     | 
| 28 | 
         
            +
              tag = "latest";
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
              contents =
         
     | 
| 31 | 
         
            +
                [ llama-cpp ]
         
     | 
| 32 | 
         
            +
                ++ lib.optionals interactive [
         
     | 
| 33 | 
         
            +
                  coreutils
         
     | 
| 34 | 
         
            +
                  dockerTools.binSh
         
     | 
| 35 | 
         
            +
                  dockerTools.caCertificates
         
     | 
| 36 | 
         
            +
                ];
         
     | 
| 37 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.devops/nix/jetson-support.nix
    ADDED
    
    | 
         @@ -0,0 +1,39 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            { inputs, ... }:
         
     | 
| 2 | 
         
            +
            {
         
     | 
| 3 | 
         
            +
              perSystem =
         
     | 
| 4 | 
         
            +
                {
         
     | 
| 5 | 
         
            +
                  config,
         
     | 
| 6 | 
         
            +
                  system,
         
     | 
| 7 | 
         
            +
                  lib,
         
     | 
| 8 | 
         
            +
                  pkgsCuda,
         
     | 
| 9 | 
         
            +
                  ...
         
     | 
| 10 | 
         
            +
                }:
         
     | 
| 11 | 
         
            +
                {
         
     | 
| 12 | 
         
            +
                  legacyPackages =
         
     | 
| 13 | 
         
            +
                    let
         
     | 
| 14 | 
         
            +
                      caps.llamaPackagesXavier = "7.2";
         
     | 
| 15 | 
         
            +
                      caps.llamaPackagesOrin = "8.7";
         
     | 
| 16 | 
         
            +
                      caps.llamaPackagesTX2 = "6.2";
         
     | 
| 17 | 
         
            +
                      caps.llamaPackagesNano = "5.3";
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
                      pkgsFor =
         
     | 
| 20 | 
         
            +
                        cap:
         
     | 
| 21 | 
         
            +
                        import inputs.nixpkgs {
         
     | 
| 22 | 
         
            +
                          inherit system;
         
     | 
| 23 | 
         
            +
                          config = {
         
     | 
| 24 | 
         
            +
                            cudaSupport = true;
         
     | 
| 25 | 
         
            +
                            cudaCapabilities = [ cap ];
         
     | 
| 26 | 
         
            +
                            cudaEnableForwardCompat = false;
         
     | 
| 27 | 
         
            +
                            inherit (pkgsCuda.config) allowUnfreePredicate;
         
     | 
| 28 | 
         
            +
                          };
         
     | 
| 29 | 
         
            +
                        };
         
     | 
| 30 | 
         
            +
                    in
         
     | 
| 31 | 
         
            +
                    builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
                  packages = lib.optionalAttrs (system == "aarch64-linux") {
         
     | 
| 34 | 
         
            +
                    jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
         
     | 
| 35 | 
         
            +
                    jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
         
     | 
| 36 | 
         
            +
                    jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
         
     | 
| 37 | 
         
            +
                  };
         
     | 
| 38 | 
         
            +
                };
         
     | 
| 39 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.devops/nix/nixpkgs-instances.nix
    ADDED
    
    | 
         @@ -0,0 +1,45 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            { inputs, ... }:
         
     | 
| 2 | 
         
            +
            {
         
     | 
| 3 | 
         
            +
              # The _module.args definitions are passed on to modules as arguments. E.g.
         
     | 
| 4 | 
         
            +
              # the module `{ pkgs ... }: { /* config */ }` implicitly uses
         
     | 
| 5 | 
         
            +
              # `_module.args.pkgs` (defined in this case by flake-parts).
         
     | 
| 6 | 
         
            +
              perSystem =
         
     | 
| 7 | 
         
            +
                { system, ... }:
         
     | 
| 8 | 
         
            +
                {
         
     | 
| 9 | 
         
            +
                  _module.args = {
         
     | 
| 10 | 
         
            +
                    # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
         
     | 
| 11 | 
         
            +
                    # again, the below creates several nixpkgs instances which the
         
     | 
| 12 | 
         
            +
                    # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
         
     | 
| 13 | 
         
            +
                    #
         
     | 
| 14 | 
         
            +
                    # This is currently "slow" and "expensive", on a certain scale.
         
     | 
| 15 | 
         
            +
                    # This also isn't "right" in that this hinders dependency injection at
         
     | 
| 16 | 
         
            +
                    # the level of flake inputs. This might get removed in the foreseeable
         
     | 
| 17 | 
         
            +
                    # future.
         
     | 
| 18 | 
         
            +
                    #
         
     | 
| 19 | 
         
            +
                    # Note that you can use these expressions without Nix
         
     | 
| 20 | 
         
            +
                    # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                    pkgsCuda = import inputs.nixpkgs {
         
     | 
| 23 | 
         
            +
                      inherit system;
         
     | 
| 24 | 
         
            +
                      # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
         
     | 
| 25 | 
         
            +
                      # and ucx are built with CUDA support)
         
     | 
| 26 | 
         
            +
                      config.cudaSupport = true;
         
     | 
| 27 | 
         
            +
                      config.allowUnfreePredicate =
         
     | 
| 28 | 
         
            +
                        p:
         
     | 
| 29 | 
         
            +
                        builtins.all (
         
     | 
| 30 | 
         
            +
                          license:
         
     | 
| 31 | 
         
            +
                          license.free
         
     | 
| 32 | 
         
            +
                          || builtins.elem license.shortName [
         
     | 
| 33 | 
         
            +
                            "CUDA EULA"
         
     | 
| 34 | 
         
            +
                            "cuDNN EULA"
         
     | 
| 35 | 
         
            +
                          ]
         
     | 
| 36 | 
         
            +
                        ) (p.meta.licenses or [ p.meta.license ]);
         
     | 
| 37 | 
         
            +
                    };
         
     | 
| 38 | 
         
            +
                    # Ensure dependencies use ROCm consistently
         
     | 
| 39 | 
         
            +
                    pkgsRocm = import inputs.nixpkgs {
         
     | 
| 40 | 
         
            +
                      inherit system;
         
     | 
| 41 | 
         
            +
                      config.rocmSupport = true;
         
     | 
| 42 | 
         
            +
                    };
         
     | 
| 43 | 
         
            +
                  };
         
     | 
| 44 | 
         
            +
                };
         
     | 
| 45 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.devops/nix/package-gguf-py.nix
    ADDED
    
    | 
         @@ -0,0 +1,36 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              lib,
         
     | 
| 3 | 
         
            +
              llamaVersion,
         
     | 
| 4 | 
         
            +
              numpy,
         
     | 
| 5 | 
         
            +
              tqdm,
         
     | 
| 6 | 
         
            +
              sentencepiece,
         
     | 
| 7 | 
         
            +
              pyyaml,
         
     | 
| 8 | 
         
            +
              poetry-core,
         
     | 
| 9 | 
         
            +
              buildPythonPackage,
         
     | 
| 10 | 
         
            +
              pytestCheckHook,
         
     | 
| 11 | 
         
            +
            }:
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            buildPythonPackage {
         
     | 
| 14 | 
         
            +
              pname = "gguf";
         
     | 
| 15 | 
         
            +
              version = llamaVersion;
         
     | 
| 16 | 
         
            +
              pyproject = true;
         
     | 
| 17 | 
         
            +
              nativeBuildInputs = [ poetry-core ];
         
     | 
| 18 | 
         
            +
              propagatedBuildInputs = [
         
     | 
| 19 | 
         
            +
                numpy
         
     | 
| 20 | 
         
            +
                tqdm
         
     | 
| 21 | 
         
            +
                sentencepiece
         
     | 
| 22 | 
         
            +
                pyyaml
         
     | 
| 23 | 
         
            +
              ];
         
     | 
| 24 | 
         
            +
              src = lib.cleanSource ../../gguf-py;
         
     | 
| 25 | 
         
            +
              pythonImportsCheck = [
         
     | 
| 26 | 
         
            +
                "numpy"
         
     | 
| 27 | 
         
            +
                "gguf"
         
     | 
| 28 | 
         
            +
              ];
         
     | 
| 29 | 
         
            +
              nativeCheckInputs = [ pytestCheckHook ];
         
     | 
| 30 | 
         
            +
              doCheck = true;
         
     | 
| 31 | 
         
            +
              meta = with lib; {
         
     | 
| 32 | 
         
            +
                description = "Python package for writing binary files in the GGUF format";
         
     | 
| 33 | 
         
            +
                license = licenses.mit;
         
     | 
| 34 | 
         
            +
                maintainers = [ maintainers.ditsuke ];
         
     | 
| 35 | 
         
            +
              };
         
     | 
| 36 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.devops/nix/package.nix
    ADDED
    
    | 
         @@ -0,0 +1,246 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              lib,
         
     | 
| 3 | 
         
            +
              glibc,
         
     | 
| 4 | 
         
            +
              config,
         
     | 
| 5 | 
         
            +
              stdenv,
         
     | 
| 6 | 
         
            +
              runCommand,
         
     | 
| 7 | 
         
            +
              cmake,
         
     | 
| 8 | 
         
            +
              ninja,
         
     | 
| 9 | 
         
            +
              pkg-config,
         
     | 
| 10 | 
         
            +
              git,
         
     | 
| 11 | 
         
            +
              mpi,
         
     | 
| 12 | 
         
            +
              blas,
         
     | 
| 13 | 
         
            +
              cudaPackages,
         
     | 
| 14 | 
         
            +
              autoAddDriverRunpath,
         
     | 
| 15 | 
         
            +
              darwin,
         
     | 
| 16 | 
         
            +
              rocmPackages,
         
     | 
| 17 | 
         
            +
              vulkan-headers,
         
     | 
| 18 | 
         
            +
              vulkan-loader,
         
     | 
| 19 | 
         
            +
              curl,
         
     | 
| 20 | 
         
            +
              shaderc,
         
     | 
| 21 | 
         
            +
              useBlas ?
         
     | 
| 22 | 
         
            +
                builtins.all (x: !x) [
         
     | 
| 23 | 
         
            +
                  useCuda
         
     | 
| 24 | 
         
            +
                  useMetalKit
         
     | 
| 25 | 
         
            +
                  useRocm
         
     | 
| 26 | 
         
            +
                  useVulkan
         
     | 
| 27 | 
         
            +
                ]
         
     | 
| 28 | 
         
            +
                && blas.meta.available,
         
     | 
| 29 | 
         
            +
              useCuda ? config.cudaSupport,
         
     | 
| 30 | 
         
            +
              useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
         
     | 
| 31 | 
         
            +
              # Increases the runtime closure size by ~700M
         
     | 
| 32 | 
         
            +
              useMpi ? false,
         
     | 
| 33 | 
         
            +
              useRocm ? config.rocmSupport,
         
     | 
| 34 | 
         
            +
              enableCurl ? true,
         
     | 
| 35 | 
         
            +
              useVulkan ? false,
         
     | 
| 36 | 
         
            +
              llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
              # It's necessary to consistently use backendStdenv when building with CUDA support,
         
     | 
| 39 | 
         
            +
              # otherwise we get libstdc++ errors downstream.
         
     | 
| 40 | 
         
            +
              effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
         
     | 
| 41 | 
         
            +
              enableStatic ? effectiveStdenv.hostPlatform.isStatic,
         
     | 
| 42 | 
         
            +
              precompileMetalShaders ? false,
         
     | 
| 43 | 
         
            +
            }:
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            let
         
     | 
| 46 | 
         
            +
              inherit (lib)
         
     | 
| 47 | 
         
            +
                cmakeBool
         
     | 
| 48 | 
         
            +
                cmakeFeature
         
     | 
| 49 | 
         
            +
                optionals
         
     | 
| 50 | 
         
            +
                strings
         
     | 
| 51 | 
         
            +
                ;
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
              stdenv = throw "Use effectiveStdenv instead";
         
     | 
| 54 | 
         
            +
             
     | 
| 55 | 
         
            +
              suffices =
         
     | 
| 56 | 
         
            +
                lib.optionals useBlas [ "BLAS" ]
         
     | 
| 57 | 
         
            +
                ++ lib.optionals useCuda [ "CUDA" ]
         
     | 
| 58 | 
         
            +
                ++ lib.optionals useMetalKit [ "MetalKit" ]
         
     | 
| 59 | 
         
            +
                ++ lib.optionals useMpi [ "MPI" ]
         
     | 
| 60 | 
         
            +
                ++ lib.optionals useRocm [ "ROCm" ]
         
     | 
| 61 | 
         
            +
                ++ lib.optionals useVulkan [ "Vulkan" ];
         
     | 
| 62 | 
         
            +
             
     | 
| 63 | 
         
            +
              pnameSuffix =
         
     | 
| 64 | 
         
            +
                strings.optionalString (suffices != [ ])
         
     | 
| 65 | 
         
            +
                  "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
         
     | 
| 66 | 
         
            +
              descriptionSuffix = strings.optionalString (
         
     | 
| 67 | 
         
            +
                suffices != [ ]
         
     | 
| 68 | 
         
            +
              ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
              xcrunHost = runCommand "xcrunHost" { } ''
         
     | 
| 71 | 
         
            +
                mkdir -p $out/bin
         
     | 
| 72 | 
         
            +
                ln -s /usr/bin/xcrun $out/bin
         
     | 
| 73 | 
         
            +
              '';
         
     | 
| 74 | 
         
            +
             
     | 
| 75 | 
         
            +
              # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
         
     | 
| 76 | 
         
            +
              # separately
         
     | 
| 77 | 
         
            +
              darwinBuildInputs =
         
     | 
| 78 | 
         
            +
                with darwin.apple_sdk.frameworks;
         
     | 
| 79 | 
         
            +
                [
         
     | 
| 80 | 
         
            +
                  Accelerate
         
     | 
| 81 | 
         
            +
                  CoreVideo
         
     | 
| 82 | 
         
            +
                  CoreGraphics
         
     | 
| 83 | 
         
            +
                ]
         
     | 
| 84 | 
         
            +
                ++ optionals useMetalKit [ MetalKit ];
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
            +
              cudaBuildInputs = with cudaPackages; [
         
     | 
| 87 | 
         
            +
                cuda_cudart
         
     | 
| 88 | 
         
            +
                cuda_cccl # <nv/target>
         
     | 
| 89 | 
         
            +
                libcublas
         
     | 
| 90 | 
         
            +
              ];
         
     | 
| 91 | 
         
            +
             
     | 
| 92 | 
         
            +
              rocmBuildInputs = with rocmPackages; [
         
     | 
| 93 | 
         
            +
                clr
         
     | 
| 94 | 
         
            +
                hipblas
         
     | 
| 95 | 
         
            +
                rocblas
         
     | 
| 96 | 
         
            +
              ];
         
     | 
| 97 | 
         
            +
             
     | 
| 98 | 
         
            +
              vulkanBuildInputs = [
         
     | 
| 99 | 
         
            +
                vulkan-headers
         
     | 
| 100 | 
         
            +
                vulkan-loader
         
     | 
| 101 | 
         
            +
                shaderc
         
     | 
| 102 | 
         
            +
              ];
         
     | 
| 103 | 
         
            +
            in
         
     | 
| 104 | 
         
            +
             
     | 
| 105 | 
         
            +
            effectiveStdenv.mkDerivation (finalAttrs: {
         
     | 
| 106 | 
         
            +
              pname = "llama-cpp${pnameSuffix}";
         
     | 
| 107 | 
         
            +
              version = llamaVersion;
         
     | 
| 108 | 
         
            +
             
     | 
| 109 | 
         
            +
              # Note: none of the files discarded here are visible in the sandbox or
         
     | 
| 110 | 
         
            +
              # affect the output hash. This also means they can be modified without
         
     | 
| 111 | 
         
            +
              # triggering a rebuild.
         
     | 
| 112 | 
         
            +
              src = lib.cleanSourceWith {
         
     | 
| 113 | 
         
            +
                filter =
         
     | 
| 114 | 
         
            +
                  name: type:
         
     | 
| 115 | 
         
            +
                  let
         
     | 
| 116 | 
         
            +
                    noneOf = builtins.all (x: !x);
         
     | 
| 117 | 
         
            +
                    baseName = baseNameOf name;
         
     | 
| 118 | 
         
            +
                  in
         
     | 
| 119 | 
         
            +
                  noneOf [
         
     | 
| 120 | 
         
            +
                    (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
         
     | 
| 121 | 
         
            +
                    (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
         
     | 
| 122 | 
         
            +
                    (lib.hasPrefix "." baseName) # Skip hidden files and directories
         
     | 
| 123 | 
         
            +
                    (baseName == "flake.lock")
         
     | 
| 124 | 
         
            +
                  ];
         
     | 
| 125 | 
         
            +
                src = lib.cleanSource ../../.;
         
     | 
| 126 | 
         
            +
              };
         
     | 
| 127 | 
         
            +
             
     | 
| 128 | 
         
            +
              postPatch = ''
         
     | 
| 129 | 
         
            +
                substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
         
     | 
| 130 | 
         
            +
                  --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
         
     | 
| 131 | 
         
            +
                substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
         
     | 
| 132 | 
         
            +
                  --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
         
     | 
| 133 | 
         
            +
              '';
         
     | 
| 134 | 
         
            +
             
     | 
| 135 | 
         
            +
              # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
         
     | 
| 136 | 
         
            +
              # `default.metallib` may be compiled with Metal compiler from XCode
         
     | 
| 137 | 
         
            +
              # and we need to escape sandbox on MacOS to access Metal compiler.
         
     | 
| 138 | 
         
            +
              # `xcrun` is used find the path of the Metal compiler, which is varible
         
     | 
| 139 | 
         
            +
              # and not on $PATH
         
     | 
| 140 | 
         
            +
              # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
         
     | 
| 141 | 
         
            +
              __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
         
     | 
| 142 | 
         
            +
             
     | 
| 143 | 
         
            +
              nativeBuildInputs =
         
     | 
| 144 | 
         
            +
                [
         
     | 
| 145 | 
         
            +
                  cmake
         
     | 
| 146 | 
         
            +
                  ninja
         
     | 
| 147 | 
         
            +
                  pkg-config
         
     | 
| 148 | 
         
            +
                  git
         
     | 
| 149 | 
         
            +
                ]
         
     | 
| 150 | 
         
            +
                ++ optionals useCuda [
         
     | 
| 151 | 
         
            +
                  cudaPackages.cuda_nvcc
         
     | 
| 152 | 
         
            +
             
     | 
| 153 | 
         
            +
                  autoAddDriverRunpath
         
     | 
| 154 | 
         
            +
                ]
         
     | 
| 155 | 
         
            +
                ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
         
     | 
| 156 | 
         
            +
                ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
         
     | 
| 157 | 
         
            +
             
     | 
| 158 | 
         
            +
              buildInputs =
         
     | 
| 159 | 
         
            +
                optionals effectiveStdenv.isDarwin darwinBuildInputs
         
     | 
| 160 | 
         
            +
                ++ optionals useCuda cudaBuildInputs
         
     | 
| 161 | 
         
            +
                ++ optionals useMpi [ mpi ]
         
     | 
| 162 | 
         
            +
                ++ optionals useRocm rocmBuildInputs
         
     | 
| 163 | 
         
            +
                ++ optionals useBlas [ blas ]
         
     | 
| 164 | 
         
            +
                ++ optionals useVulkan vulkanBuildInputs
         
     | 
| 165 | 
         
            +
                ++ optionals enableCurl [ curl ];
         
     | 
| 166 | 
         
            +
             
     | 
| 167 | 
         
            +
              cmakeFlags =
         
     | 
| 168 | 
         
            +
                [
         
     | 
| 169 | 
         
            +
                  (cmakeBool "LLAMA_BUILD_SERVER" true)
         
     | 
| 170 | 
         
            +
                  (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
         
     | 
| 171 | 
         
            +
                  (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
         
     | 
| 172 | 
         
            +
                  (cmakeBool "LLAMA_CURL" enableCurl)
         
     | 
| 173 | 
         
            +
                  (cmakeBool "GGML_NATIVE" false)
         
     | 
| 174 | 
         
            +
                  (cmakeBool "GGML_BLAS" useBlas)
         
     | 
| 175 | 
         
            +
                  (cmakeBool "GGML_CUDA" useCuda)
         
     | 
| 176 | 
         
            +
                  (cmakeBool "GGML_HIP" useRocm)
         
     | 
| 177 | 
         
            +
                  (cmakeBool "GGML_METAL" useMetalKit)
         
     | 
| 178 | 
         
            +
                  (cmakeBool "GGML_VULKAN" useVulkan)
         
     | 
| 179 | 
         
            +
                  (cmakeBool "GGML_STATIC" enableStatic)
         
     | 
| 180 | 
         
            +
                ]
         
     | 
| 181 | 
         
            +
                ++ optionals useCuda [
         
     | 
| 182 | 
         
            +
                  (
         
     | 
| 183 | 
         
            +
                    with cudaPackages.flags;
         
     | 
| 184 | 
         
            +
                    cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
         
     | 
| 185 | 
         
            +
                      builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
         
     | 
| 186 | 
         
            +
                    )
         
     | 
| 187 | 
         
            +
                  )
         
     | 
| 188 | 
         
            +
                ]
         
     | 
| 189 | 
         
            +
                ++ optionals useRocm [
         
     | 
| 190 | 
         
            +
                  (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
         
     | 
| 191 | 
         
            +
                  (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
         
     | 
| 192 | 
         
            +
                ]
         
     | 
| 193 | 
         
            +
                ++ optionals useMetalKit [
         
     | 
| 194 | 
         
            +
                  (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
         
     | 
| 195 | 
         
            +
                  (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
         
     | 
| 196 | 
         
            +
                ];
         
     | 
| 197 | 
         
            +
             
     | 
| 198 | 
         
            +
              # Environment variables needed for ROCm
         
     | 
| 199 | 
         
            +
              env = optionals useRocm {
         
     | 
| 200 | 
         
            +
                ROCM_PATH = "${rocmPackages.clr}";
         
     | 
| 201 | 
         
            +
                HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
         
     | 
| 202 | 
         
            +
              };
         
     | 
| 203 | 
         
            +
             
     | 
| 204 | 
         
            +
              # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
         
     | 
| 205 | 
         
            +
              # if they haven't been added yet.
         
     | 
| 206 | 
         
            +
              postInstall = ''
         
     | 
| 207 | 
         
            +
                mkdir -p $out/include
         
     | 
| 208 | 
         
            +
                cp $src/include/llama.h $out/include/
         
     | 
| 209 | 
         
            +
              '';
         
     | 
| 210 | 
         
            +
             
     | 
| 211 | 
         
            +
              meta = {
         
     | 
| 212 | 
         
            +
                # Configurations we don't want even the CI to evaluate. Results in the
         
     | 
| 213 | 
         
            +
                # "unsupported platform" messages. This is mostly a no-op, because
         
     | 
| 214 | 
         
            +
                # cudaPackages would've refused to evaluate anyway.
         
     | 
| 215 | 
         
            +
                badPlatforms = optionals useCuda lib.platforms.darwin;
         
     | 
| 216 | 
         
            +
             
     | 
| 217 | 
         
            +
                # Configurations that are known to result in build failures. Can be
         
     | 
| 218 | 
         
            +
                # overridden by importing Nixpkgs with `allowBroken = true`.
         
     | 
| 219 | 
         
            +
                broken = (useMetalKit && !effectiveStdenv.isDarwin);
         
     | 
| 220 | 
         
            +
             
     | 
| 221 | 
         
            +
                description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
         
     | 
| 222 | 
         
            +
                homepage = "https://github.com/ggerganov/llama.cpp/";
         
     | 
| 223 | 
         
            +
                license = lib.licenses.mit;
         
     | 
| 224 | 
         
            +
             
     | 
| 225 | 
         
            +
                # Accommodates `nix run` and `lib.getExe`
         
     | 
| 226 | 
         
            +
                mainProgram = "llama-cli";
         
     | 
| 227 | 
         
            +
             
     | 
| 228 | 
         
            +
                # These people might respond, on the best effort basis, if you ping them
         
     | 
| 229 | 
         
            +
                # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
         
     | 
| 230 | 
         
            +
                # Consider adding yourself to this list if you want to ensure this flake
         
     | 
| 231 | 
         
            +
                # stays maintained and you're willing to invest your time. Do not add
         
     | 
| 232 | 
         
            +
                # other people without their consent. Consider removing people after
         
     | 
| 233 | 
         
            +
                # they've been unreachable for long periods of time.
         
     | 
| 234 | 
         
            +
             
     | 
| 235 | 
         
            +
                # Note that lib.maintainers is defined in Nixpkgs, but you may just add
         
     | 
| 236 | 
         
            +
                # an attrset following the same format as in
         
     | 
| 237 | 
         
            +
                # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
         
     | 
| 238 | 
         
            +
                maintainers = with lib.maintainers; [
         
     | 
| 239 | 
         
            +
                  philiptaron
         
     | 
| 240 | 
         
            +
                  SomeoneSerge
         
     | 
| 241 | 
         
            +
                ];
         
     | 
| 242 | 
         
            +
             
     | 
| 243 | 
         
            +
                # Extend `badPlatforms` instead
         
     | 
| 244 | 
         
            +
                platforms = lib.platforms.all;
         
     | 
| 245 | 
         
            +
              };
         
     | 
| 246 | 
         
            +
            })
         
     | 
    	
        llama.cpp/.devops/nix/python-scripts.nix
    ADDED
    
    | 
         @@ -0,0 +1,66 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              lib,
         
     | 
| 3 | 
         
            +
              stdenv,
         
     | 
| 4 | 
         
            +
              buildPythonPackage,
         
     | 
| 5 | 
         
            +
              poetry-core,
         
     | 
| 6 | 
         
            +
              mkShell,
         
     | 
| 7 | 
         
            +
              python3Packages,
         
     | 
| 8 | 
         
            +
              gguf-py,
         
     | 
| 9 | 
         
            +
            }@inputs:
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            let
         
     | 
| 12 | 
         
            +
              llama-python-deps = with python3Packages; [
         
     | 
| 13 | 
         
            +
                numpy
         
     | 
| 14 | 
         
            +
                sentencepiece
         
     | 
| 15 | 
         
            +
                transformers
         
     | 
| 16 | 
         
            +
                protobuf
         
     | 
| 17 | 
         
            +
                torchWithoutCuda
         
     | 
| 18 | 
         
            +
                gguf-py
         
     | 
| 19 | 
         
            +
                tqdm
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
                # for scripts/compare-llama-bench.py
         
     | 
| 22 | 
         
            +
                gitpython
         
     | 
| 23 | 
         
            +
                tabulate
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
                # for examples/pydantic-models-to-grammar-examples.py
         
     | 
| 26 | 
         
            +
                docstring-parser
         
     | 
| 27 | 
         
            +
                pydantic
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
              ];
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
              llama-python-test-deps = with python3Packages; [
         
     | 
| 32 | 
         
            +
                # Server bench
         
     | 
| 33 | 
         
            +
                matplotlib
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
                # server tests
         
     | 
| 36 | 
         
            +
                openai
         
     | 
| 37 | 
         
            +
                pytest
         
     | 
| 38 | 
         
            +
                prometheus-client
         
     | 
| 39 | 
         
            +
              ];
         
     | 
| 40 | 
         
            +
            in
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
            buildPythonPackage ({
         
     | 
| 43 | 
         
            +
              pname = "llama-scripts";
         
     | 
| 44 | 
         
            +
              version = "0.0.0";
         
     | 
| 45 | 
         
            +
              pyproject = true;
         
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
              # NOTE: The files filtered out here are not visible in the build sandbox, neither
         
     | 
| 48 | 
         
            +
              # do they affect the output hash. They can be modified without triggering a rebuild.
         
     | 
| 49 | 
         
            +
              src = lib.cleanSourceWith {
         
     | 
| 50 | 
         
            +
                filter =
         
     | 
| 51 | 
         
            +
                  name: type:
         
     | 
| 52 | 
         
            +
                  let
         
     | 
| 53 | 
         
            +
                    any = builtins.any (x: x);
         
     | 
| 54 | 
         
            +
                    baseName = builtins.baseNameOf name;
         
     | 
| 55 | 
         
            +
                  in
         
     | 
| 56 | 
         
            +
                  any [
         
     | 
| 57 | 
         
            +
                    (lib.hasSuffix ".py" name)
         
     | 
| 58 | 
         
            +
                    (baseName == "README.md")
         
     | 
| 59 | 
         
            +
                    (baseName == "pyproject.toml")
         
     | 
| 60 | 
         
            +
                  ];
         
     | 
| 61 | 
         
            +
                src = lib.cleanSource ../../.;
         
     | 
| 62 | 
         
            +
              };
         
     | 
| 63 | 
         
            +
              nativeBuildInputs = [ poetry-core ];
         
     | 
| 64 | 
         
            +
              nativeCheckInputs = llama-python-test-deps;
         
     | 
| 65 | 
         
            +
              dependencies = llama-python-deps;
         
     | 
| 66 | 
         
            +
            })
         
     | 
    	
        llama.cpp/.devops/nix/scope.nix
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              lib,
         
     | 
| 3 | 
         
            +
              newScope,
         
     | 
| 4 | 
         
            +
              python3,
         
     | 
| 5 | 
         
            +
              llamaVersion ? "0.0.0",
         
     | 
| 6 | 
         
            +
            }:
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            let
         
     | 
| 9 | 
         
            +
              pythonPackages = python3.pkgs;
         
     | 
| 10 | 
         
            +
              buildPythonPackage = pythonPackages.buildPythonPackage;
         
     | 
| 11 | 
         
            +
              numpy = pythonPackages.numpy;
         
     | 
| 12 | 
         
            +
              tqdm = pythonPackages.tqdm;
         
     | 
| 13 | 
         
            +
              sentencepiece = pythonPackages.sentencepiece;
         
     | 
| 14 | 
         
            +
              pyyaml = pythonPackages.pyyaml;
         
     | 
| 15 | 
         
            +
              poetry-core = pythonPackages.poetry-core;
         
     | 
| 16 | 
         
            +
              pytestCheckHook = pythonPackages.pytestCheckHook;
         
     | 
| 17 | 
         
            +
            in
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            # We're using `makeScope` instead of just writing out an attrset
         
     | 
| 20 | 
         
            +
            # because it allows users to apply overlays later using `overrideScope'`.
         
     | 
| 21 | 
         
            +
            # Cf. https://noogle.dev/f/lib/makeScope
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            lib.makeScope newScope (self: {
         
     | 
| 24 | 
         
            +
              inherit llamaVersion;
         
     | 
| 25 | 
         
            +
              gguf-py = self.callPackage ./package-gguf-py.nix {
         
     | 
| 26 | 
         
            +
                inherit
         
     | 
| 27 | 
         
            +
                  buildPythonPackage
         
     | 
| 28 | 
         
            +
                  numpy
         
     | 
| 29 | 
         
            +
                  tqdm
         
     | 
| 30 | 
         
            +
                  sentencepiece
         
     | 
| 31 | 
         
            +
                  poetry-core
         
     | 
| 32 | 
         
            +
                  pyyaml
         
     | 
| 33 | 
         
            +
                  pytestCheckHook
         
     | 
| 34 | 
         
            +
                  ;
         
     | 
| 35 | 
         
            +
              };
         
     | 
| 36 | 
         
            +
              python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
         
     | 
| 37 | 
         
            +
              llama-cpp = self.callPackage ./package.nix { };
         
     | 
| 38 | 
         
            +
              docker = self.callPackage ./docker.nix { };
         
     | 
| 39 | 
         
            +
              docker-min = self.callPackage ./docker.nix { interactive = false; };
         
     | 
| 40 | 
         
            +
              sif = self.callPackage ./sif.nix { };
         
     | 
| 41 | 
         
            +
            })
         
     | 
    	
        llama.cpp/.devops/nix/sif.nix
    ADDED
    
    | 
         @@ -0,0 +1,27 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              lib,
         
     | 
| 3 | 
         
            +
              singularity-tools,
         
     | 
| 4 | 
         
            +
              llama-cpp,
         
     | 
| 5 | 
         
            +
              bashInteractive,
         
     | 
| 6 | 
         
            +
              interactive ? false,
         
     | 
| 7 | 
         
            +
            }:
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            let
         
     | 
| 10 | 
         
            +
              optionalInt = cond: x: if cond then x else 0;
         
     | 
| 11 | 
         
            +
            in
         
     | 
| 12 | 
         
            +
            singularity-tools.buildImage rec {
         
     | 
| 13 | 
         
            +
              inherit (llama-cpp) name;
         
     | 
| 14 | 
         
            +
              contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
              # These are excessive (but safe) for most variants. Building singularity
         
     | 
| 17 | 
         
            +
              # images requires superuser privileges, so we build them inside a VM in a
         
     | 
| 18 | 
         
            +
              # writable image of pre-determined size.
         
     | 
| 19 | 
         
            +
              #
         
     | 
| 20 | 
         
            +
              # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
         
     | 
| 21 | 
         
            +
              #
         
     | 
| 22 | 
         
            +
              # Expected image sizes:
         
     | 
| 23 | 
         
            +
              # - cpu/blas: 150M,
         
     | 
| 24 | 
         
            +
              # - cuda, all gencodes: 560M,
         
     | 
| 25 | 
         
            +
              diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
         
     | 
| 26 | 
         
            +
              memSize = diskSize;
         
     | 
| 27 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.devops/tools.sh
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            #!/bin/bash
         
     | 
| 2 | 
         
            +
            set -e
         
     | 
| 3 | 
         
            +
             
     | 
| 4 | 
         
            +
            # Read the first argument into a variable
         
     | 
| 5 | 
         
            +
            arg1="$1"
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            # Shift the arguments to remove the first one
         
     | 
| 8 | 
         
            +
            shift
         
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
            if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
         
     | 
| 11 | 
         
            +
                python3 ./convert_hf_to_gguf.py "$@"
         
     | 
| 12 | 
         
            +
            elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
         
     | 
| 13 | 
         
            +
                ./llama-quantize "$@"
         
     | 
| 14 | 
         
            +
            elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
         
     | 
| 15 | 
         
            +
                ./llama-cli "$@"
         
     | 
| 16 | 
         
            +
            elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
         
     | 
| 17 | 
         
            +
                echo "Converting PTH to GGML..."
         
     | 
| 18 | 
         
            +
                for i in `ls $1/$2/ggml-model-f16.bin*`; do
         
     | 
| 19 | 
         
            +
                    if [ -f "${i/f16/q4_0}" ]; then
         
     | 
| 20 | 
         
            +
                        echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
         
     | 
| 21 | 
         
            +
                    else
         
     | 
| 22 | 
         
            +
                        echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
         
     | 
| 23 | 
         
            +
                        ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
         
     | 
| 24 | 
         
            +
                    fi
         
     | 
| 25 | 
         
            +
                done
         
     | 
| 26 | 
         
            +
            elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
         
     | 
| 27 | 
         
            +
                ./llama-server "$@"
         
     | 
| 28 | 
         
            +
            else
         
     | 
| 29 | 
         
            +
                echo "Unknown command: $arg1"
         
     | 
| 30 | 
         
            +
                echo "Available commands: "
         
     | 
| 31 | 
         
            +
                echo "  --run (-r): Run a model previously converted into ggml"
         
     | 
| 32 | 
         
            +
                echo "              ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
         
     | 
| 33 | 
         
            +
                echo "  --convert (-c): Convert a llama model into ggml"
         
     | 
| 34 | 
         
            +
                echo "              ex: --outtype f16 \"/models/7B/\" "
         
     | 
| 35 | 
         
            +
                echo "  --quantize (-q): Optimize with quantization process ggml"
         
     | 
| 36 | 
         
            +
                echo "              ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
         
     | 
| 37 | 
         
            +
                echo "  --all-in-one (-a): Execute --convert & --quantize"
         
     | 
| 38 | 
         
            +
                echo "              ex: \"/models/\" 7B"
         
     | 
| 39 | 
         
            +
                echo "  --server (-s): Run a model on the server"
         
     | 
| 40 | 
         
            +
                echo "              ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
         
     | 
| 41 | 
         
            +
            fi
         
     | 
    	
        llama.cpp/.dockerignore
    ADDED
    
    | 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            *.o
         
     | 
| 2 | 
         
            +
            *.a
         
     | 
| 3 | 
         
            +
            .cache/
         
     | 
| 4 | 
         
            +
            # Do not ignore .git directory, otherwise the reported build number will always be 0
         
     | 
| 5 | 
         
            +
            .github/
         
     | 
| 6 | 
         
            +
            .gitignore
         
     | 
| 7 | 
         
            +
            .vs/
         
     | 
| 8 | 
         
            +
            .vscode/
         
     | 
| 9 | 
         
            +
            .DS_Store
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            build*/
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            models/*
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            /llama-cli
         
     | 
| 16 | 
         
            +
            /llama-quantize
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            arm_neon.h
         
     | 
| 19 | 
         
            +
            compile_commands.json
         
     | 
| 20 | 
         
            +
            Dockerfile
         
     | 
    	
        llama.cpp/.ecrc
    ADDED
    
    | 
         @@ -0,0 +1,6 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
         
     | 
| 3 | 
         
            +
              "Disable": {
         
     | 
| 4 | 
         
            +
                "IndentSize": true
         
     | 
| 5 | 
         
            +
              }
         
     | 
| 6 | 
         
            +
            }
         
     | 
    	
        llama.cpp/.editorconfig
    ADDED
    
    | 
         @@ -0,0 +1,42 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # https://EditorConfig.org
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            # Top-most EditorConfig file
         
     | 
| 4 | 
         
            +
            root = true
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Unix-style newlines with a newline ending every file, utf-8 charset
         
     | 
| 7 | 
         
            +
            [*]
         
     | 
| 8 | 
         
            +
            end_of_line = lf
         
     | 
| 9 | 
         
            +
            insert_final_newline = true
         
     | 
| 10 | 
         
            +
            trim_trailing_whitespace = true
         
     | 
| 11 | 
         
            +
            charset = utf-8
         
     | 
| 12 | 
         
            +
            indent_style = space
         
     | 
| 13 | 
         
            +
            indent_size = 4
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            [Makefile]
         
     | 
| 16 | 
         
            +
            indent_style = tab
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            [scripts/*.mk]
         
     | 
| 19 | 
         
            +
            indent_style = tab
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            [prompts/*.txt]
         
     | 
| 22 | 
         
            +
            insert_final_newline = unset
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
            [examples/server/public/*]
         
     | 
| 25 | 
         
            +
            indent_size = 2
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
            [examples/server/public/deps_*]
         
     | 
| 28 | 
         
            +
            trim_trailing_whitespace = unset
         
     | 
| 29 | 
         
            +
            indent_style = unset
         
     | 
| 30 | 
         
            +
            indent_size = unset
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            [examples/server/deps_*]
         
     | 
| 33 | 
         
            +
            trim_trailing_whitespace = unset
         
     | 
| 34 | 
         
            +
            indent_style = unset
         
     | 
| 35 | 
         
            +
            indent_size = unset
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
         
     | 
| 38 | 
         
            +
            indent_style = tab
         
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
            [examples/cvector-generator/*.txt]
         
     | 
| 41 | 
         
            +
            trim_trailing_whitespace = unset
         
     | 
| 42 | 
         
            +
            insert_final_newline = unset
         
     | 
    	
        llama.cpp/.flake8
    ADDED
    
    | 
         @@ -0,0 +1,17 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            [flake8]
         
     | 
| 2 | 
         
            +
            max-line-length = 125
         
     | 
| 3 | 
         
            +
            ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
         
     | 
| 4 | 
         
            +
            exclude =
         
     | 
| 5 | 
         
            +
                # Do not traverse examples
         
     | 
| 6 | 
         
            +
                examples,
         
     | 
| 7 | 
         
            +
                # Do not include package initializers
         
     | 
| 8 | 
         
            +
                __init__.py,
         
     | 
| 9 | 
         
            +
                # No need to traverse our git directory
         
     | 
| 10 | 
         
            +
                .git,
         
     | 
| 11 | 
         
            +
                # There's no value in checking cache directories
         
     | 
| 12 | 
         
            +
                __pycache__,
         
     | 
| 13 | 
         
            +
                # No need to include the build path
         
     | 
| 14 | 
         
            +
                build,
         
     | 
| 15 | 
         
            +
                # This contains builds that we don't want to check
         
     | 
| 16 | 
         
            +
                dist  # This is generated with `python build .` for package releases
         
     | 
| 17 | 
         
            +
            # max-complexity = 10
         
     | 
    	
        llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml
    ADDED
    
    | 
         @@ -0,0 +1,77 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: Bug (compilation)
         
     | 
| 2 | 
         
            +
            description: Something goes wrong when trying to compile llama.cpp.
         
     | 
| 3 | 
         
            +
            title: "Compile bug: "
         
     | 
| 4 | 
         
            +
            labels: ["bug-unconfirmed", "compilation"]
         
     | 
| 5 | 
         
            +
            body:
         
     | 
| 6 | 
         
            +
              - type: markdown
         
     | 
| 7 | 
         
            +
                attributes:
         
     | 
| 8 | 
         
            +
                  value: >
         
     | 
| 9 | 
         
            +
                    Thanks for taking the time to fill out this bug report!
         
     | 
| 10 | 
         
            +
                    This issue template is intended for bug reports where the compilation of llama.cpp fails.
         
     | 
| 11 | 
         
            +
                    Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
         
     | 
| 12 | 
         
            +
                    If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
         
     | 
| 13 | 
         
            +
                    by clearing `~/.cache/ccache` (on Linux).
         
     | 
| 14 | 
         
            +
              - type: textarea
         
     | 
| 15 | 
         
            +
                id: commit
         
     | 
| 16 | 
         
            +
                attributes:
         
     | 
| 17 | 
         
            +
                  label: Git commit
         
     | 
| 18 | 
         
            +
                  description: Which commit are you trying to compile?
         
     | 
| 19 | 
         
            +
                  placeholder: |
         
     | 
| 20 | 
         
            +
                    $git rev-parse HEAD
         
     | 
| 21 | 
         
            +
                    84a07a17b1b08cf2b9747c633a2372782848a27f
         
     | 
| 22 | 
         
            +
                validations:
         
     | 
| 23 | 
         
            +
                  required: true
         
     | 
| 24 | 
         
            +
              - type: dropdown
         
     | 
| 25 | 
         
            +
                id: operating-system
         
     | 
| 26 | 
         
            +
                attributes:
         
     | 
| 27 | 
         
            +
                  label: Operating systems
         
     | 
| 28 | 
         
            +
                  description: Which operating systems do you know to be affected?
         
     | 
| 29 | 
         
            +
                  multiple: true
         
     | 
| 30 | 
         
            +
                  options:
         
     | 
| 31 | 
         
            +
                    - Linux
         
     | 
| 32 | 
         
            +
                    - Mac
         
     | 
| 33 | 
         
            +
                    - Windows
         
     | 
| 34 | 
         
            +
                    - BSD
         
     | 
| 35 | 
         
            +
                    - Other? (Please let us know in description)
         
     | 
| 36 | 
         
            +
                validations:
         
     | 
| 37 | 
         
            +
                  required: true
         
     | 
| 38 | 
         
            +
              - type: dropdown
         
     | 
| 39 | 
         
            +
                id: backends
         
     | 
| 40 | 
         
            +
                attributes:
         
     | 
| 41 | 
         
            +
                    label: GGML backends
         
     | 
| 42 | 
         
            +
                    description: Which GGML backends do you know to be affected?
         
     | 
| 43 | 
         
            +
                    options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
         
     | 
| 44 | 
         
            +
                    multiple: true
         
     | 
| 45 | 
         
            +
                validations:
         
     | 
| 46 | 
         
            +
                  required: true
         
     | 
| 47 | 
         
            +
              - type: textarea
         
     | 
| 48 | 
         
            +
                id: info
         
     | 
| 49 | 
         
            +
                attributes:
         
     | 
| 50 | 
         
            +
                  label: Problem description & steps to reproduce
         
     | 
| 51 | 
         
            +
                  description: >
         
     | 
| 52 | 
         
            +
                    Please give us a summary of the problem and tell us how to reproduce it.
         
     | 
| 53 | 
         
            +
                    If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
         
     | 
| 54 | 
         
            +
                  placeholder: >
         
     | 
| 55 | 
         
            +
                    I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
         
     | 
| 56 | 
         
            +
                    Here are the exact commands that I used: ...
         
     | 
| 57 | 
         
            +
                validations:
         
     | 
| 58 | 
         
            +
                  required: true
         
     | 
| 59 | 
         
            +
              - type: textarea
         
     | 
| 60 | 
         
            +
                id: first_bad_commit
         
     | 
| 61 | 
         
            +
                attributes:
         
     | 
| 62 | 
         
            +
                  label: First Bad Commit
         
     | 
| 63 | 
         
            +
                  description: >
         
     | 
| 64 | 
         
            +
                    If the bug was not present on an earlier version: when did it start appearing?
         
     | 
| 65 | 
         
            +
                    If possible, please do a git bisect and identify the exact commit that introduced the bug.
         
     | 
| 66 | 
         
            +
                validations:
         
     | 
| 67 | 
         
            +
                  required: false
         
     | 
| 68 | 
         
            +
              - type: textarea
         
     | 
| 69 | 
         
            +
                id: logs
         
     | 
| 70 | 
         
            +
                attributes:
         
     | 
| 71 | 
         
            +
                  label: Relevant log output
         
     | 
| 72 | 
         
            +
                  description: >
         
     | 
| 73 | 
         
            +
                      Please copy and paste any relevant log output, including the command that you entered and any generated text.
         
     | 
| 74 | 
         
            +
                      This will be automatically formatted into code, so no need for backticks.
         
     | 
| 75 | 
         
            +
                  render: shell
         
     | 
| 76 | 
         
            +
                validations:
         
     | 
| 77 | 
         
            +
                  required: true
         
     | 
    	
        llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml
    ADDED
    
    | 
         @@ -0,0 +1,101 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: Bug (model use)
         
     | 
| 2 | 
         
            +
            description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
         
     | 
| 3 | 
         
            +
            title: "Eval bug: "
         
     | 
| 4 | 
         
            +
            labels: ["bug-unconfirmed", "model evaluation"]
         
     | 
| 5 | 
         
            +
            body:
         
     | 
| 6 | 
         
            +
              - type: markdown
         
     | 
| 7 | 
         
            +
                attributes:
         
     | 
| 8 | 
         
            +
                  value: >
         
     | 
| 9 | 
         
            +
                    Thanks for taking the time to fill out this bug report!
         
     | 
| 10 | 
         
            +
                    This issue template is intended for bug reports where the model evaluation results
         
     | 
| 11 | 
         
            +
                    (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
         
     | 
| 12 | 
         
            +
                    If you encountered the issue while using an external UI (e.g. ollama),
         
     | 
| 13 | 
         
            +
                    please reproduce your issue using one of the examples/binaries in this repository.
         
     | 
| 14 | 
         
            +
                    The `llama-cli` binary can be used for simple and reproducible model inference.
         
     | 
| 15 | 
         
            +
              - type: textarea
         
     | 
| 16 | 
         
            +
                id: version
         
     | 
| 17 | 
         
            +
                attributes:
         
     | 
| 18 | 
         
            +
                  label: Name and Version
         
     | 
| 19 | 
         
            +
                  description: Which version of our software are you running? (use `--version` to get a version string)
         
     | 
| 20 | 
         
            +
                  placeholder: |
         
     | 
| 21 | 
         
            +
                    $./llama-cli --version
         
     | 
| 22 | 
         
            +
                    version: 2999 (42b4109e)
         
     | 
| 23 | 
         
            +
                    built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
         
     | 
| 24 | 
         
            +
                validations:
         
     | 
| 25 | 
         
            +
                  required: true
         
     | 
| 26 | 
         
            +
              - type: dropdown
         
     | 
| 27 | 
         
            +
                id: operating-system
         
     | 
| 28 | 
         
            +
                attributes:
         
     | 
| 29 | 
         
            +
                  label: Operating systems
         
     | 
| 30 | 
         
            +
                  description: Which operating systems do you know to be affected?
         
     | 
| 31 | 
         
            +
                  multiple: true
         
     | 
| 32 | 
         
            +
                  options:
         
     | 
| 33 | 
         
            +
                    - Linux
         
     | 
| 34 | 
         
            +
                    - Mac
         
     | 
| 35 | 
         
            +
                    - Windows
         
     | 
| 36 | 
         
            +
                    - BSD
         
     | 
| 37 | 
         
            +
                    - Other? (Please let us know in description)
         
     | 
| 38 | 
         
            +
                validations:
         
     | 
| 39 | 
         
            +
                  required: true
         
     | 
| 40 | 
         
            +
              - type: dropdown
         
     | 
| 41 | 
         
            +
                id: backends
         
     | 
| 42 | 
         
            +
                attributes:
         
     | 
| 43 | 
         
            +
                    label: GGML backends
         
     | 
| 44 | 
         
            +
                    description: Which GGML backends do you know to be affected?
         
     | 
| 45 | 
         
            +
                    options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
         
     | 
| 46 | 
         
            +
                    multiple: true
         
     | 
| 47 | 
         
            +
                validations:
         
     | 
| 48 | 
         
            +
                  required: true
         
     | 
| 49 | 
         
            +
              - type: textarea
         
     | 
| 50 | 
         
            +
                id: hardware
         
     | 
| 51 | 
         
            +
                attributes:
         
     | 
| 52 | 
         
            +
                  label: Hardware
         
     | 
| 53 | 
         
            +
                  description: Which CPUs/GPUs are you using?
         
     | 
| 54 | 
         
            +
                  placeholder: >
         
     | 
| 55 | 
         
            +
                    e.g. Ryzen 5950X + 2x RTX 4090
         
     | 
| 56 | 
         
            +
                validations:
         
     | 
| 57 | 
         
            +
                  required: true
         
     | 
| 58 | 
         
            +
              - type: textarea
         
     | 
| 59 | 
         
            +
                id: model
         
     | 
| 60 | 
         
            +
                attributes:
         
     | 
| 61 | 
         
            +
                  label: Models
         
     | 
| 62 | 
         
            +
                  description: >
         
     | 
| 63 | 
         
            +
                    Which model(s) at which quantization were you using when encountering the bug?
         
     | 
| 64 | 
         
            +
                    If you downloaded a GGUF file off of Huggingface, please provide a link.
         
     | 
| 65 | 
         
            +
                  placeholder: >
         
     | 
| 66 | 
         
            +
                    e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
         
     | 
| 67 | 
         
            +
                validations:
         
     | 
| 68 | 
         
            +
                  required: false
         
     | 
| 69 | 
         
            +
              - type: textarea
         
     | 
| 70 | 
         
            +
                id: info
         
     | 
| 71 | 
         
            +
                attributes:
         
     | 
| 72 | 
         
            +
                  label: Problem description & steps to reproduce
         
     | 
| 73 | 
         
            +
                  description: >
         
     | 
| 74 | 
         
            +
                    Please give us a summary of the problem and tell us how to reproduce it.
         
     | 
| 75 | 
         
            +
                    If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
         
     | 
| 76 | 
         
            +
                    that information would be very much appreciated by us.
         
     | 
| 77 | 
         
            +
                  placeholder: >
         
     | 
| 78 | 
         
            +
                    e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
         
     | 
| 79 | 
         
            +
                    When I use -ngl 0 it works correctly.
         
     | 
| 80 | 
         
            +
                    Here are the exact commands that I used: ...
         
     | 
| 81 | 
         
            +
                validations:
         
     | 
| 82 | 
         
            +
                  required: true
         
     | 
| 83 | 
         
            +
              - type: textarea
         
     | 
| 84 | 
         
            +
                id: first_bad_commit
         
     | 
| 85 | 
         
            +
                attributes:
         
     | 
| 86 | 
         
            +
                  label: First Bad Commit
         
     | 
| 87 | 
         
            +
                  description: >
         
     | 
| 88 | 
         
            +
                    If the bug was not present on an earlier version: when did it start appearing?
         
     | 
| 89 | 
         
            +
                    If possible, please do a git bisect and identify the exact commit that introduced the bug.
         
     | 
| 90 | 
         
            +
                validations:
         
     | 
| 91 | 
         
            +
                  required: false
         
     | 
| 92 | 
         
            +
              - type: textarea
         
     | 
| 93 | 
         
            +
                id: logs
         
     | 
| 94 | 
         
            +
                attributes:
         
     | 
| 95 | 
         
            +
                  label: Relevant log output
         
     | 
| 96 | 
         
            +
                  description: >
         
     | 
| 97 | 
         
            +
                      Please copy and paste any relevant log output, including the command that you entered and any generated text.
         
     | 
| 98 | 
         
            +
                      This will be automatically formatted into code, so no need for backticks.
         
     | 
| 99 | 
         
            +
                  render: shell
         
     | 
| 100 | 
         
            +
                validations:
         
     | 
| 101 | 
         
            +
                  required: true
         
     | 
    	
        llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml
    ADDED
    
    | 
         @@ -0,0 +1,81 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: Bug (misc.)
         
     | 
| 2 | 
         
            +
            description: Something is not working the way it should (and it's not covered by any of the above cases).
         
     | 
| 3 | 
         
            +
            title: "Misc. bug: "
         
     | 
| 4 | 
         
            +
            labels: ["bug-unconfirmed"]
         
     | 
| 5 | 
         
            +
            body:
         
     | 
| 6 | 
         
            +
              - type: markdown
         
     | 
| 7 | 
         
            +
                attributes:
         
     | 
| 8 | 
         
            +
                  value: >
         
     | 
| 9 | 
         
            +
                    Thanks for taking the time to fill out this bug report!
         
     | 
| 10 | 
         
            +
                    This issue template is intended for miscellaneous bugs that don't fit into any other category.
         
     | 
| 11 | 
         
            +
                    If you encountered the issue while using an external UI (e.g. ollama),
         
     | 
| 12 | 
         
            +
                    please reproduce your issue using one of the examples/binaries in this repository.
         
     | 
| 13 | 
         
            +
              - type: textarea
         
     | 
| 14 | 
         
            +
                id: version
         
     | 
| 15 | 
         
            +
                attributes:
         
     | 
| 16 | 
         
            +
                  label: Name and Version
         
     | 
| 17 | 
         
            +
                  description: Which version of our software is affected? (You can use `--version` to get a version string.)
         
     | 
| 18 | 
         
            +
                  placeholder: |
         
     | 
| 19 | 
         
            +
                    $./llama-cli --version
         
     | 
| 20 | 
         
            +
                    version: 2999 (42b4109e)
         
     | 
| 21 | 
         
            +
                    built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
         
     | 
| 22 | 
         
            +
                validations:
         
     | 
| 23 | 
         
            +
                  required: true
         
     | 
| 24 | 
         
            +
              - type: dropdown
         
     | 
| 25 | 
         
            +
                id: operating-system
         
     | 
| 26 | 
         
            +
                attributes:
         
     | 
| 27 | 
         
            +
                  label: Operating systems
         
     | 
| 28 | 
         
            +
                  description: Which operating systems do you know to be affected?
         
     | 
| 29 | 
         
            +
                  multiple: true
         
     | 
| 30 | 
         
            +
                  options:
         
     | 
| 31 | 
         
            +
                    - Linux
         
     | 
| 32 | 
         
            +
                    - Mac
         
     | 
| 33 | 
         
            +
                    - Windows
         
     | 
| 34 | 
         
            +
                    - BSD
         
     | 
| 35 | 
         
            +
                    - Other? (Please let us know in description)
         
     | 
| 36 | 
         
            +
                validations:
         
     | 
| 37 | 
         
            +
                  required: false
         
     | 
| 38 | 
         
            +
              - type: dropdown
         
     | 
| 39 | 
         
            +
                id: module
         
     | 
| 40 | 
         
            +
                attributes:
         
     | 
| 41 | 
         
            +
                  label: Which llama.cpp modules do you know to be affected?
         
     | 
| 42 | 
         
            +
                  multiple: true
         
     | 
| 43 | 
         
            +
                  options:
         
     | 
| 44 | 
         
            +
                    - Documentation/Github
         
     | 
| 45 | 
         
            +
                    - libllama (core library)
         
     | 
| 46 | 
         
            +
                    - llama-cli
         
     | 
| 47 | 
         
            +
                    - llama-server
         
     | 
| 48 | 
         
            +
                    - llama-bench
         
     | 
| 49 | 
         
            +
                    - llama-quantize
         
     | 
| 50 | 
         
            +
                    - Python/Bash scripts
         
     | 
| 51 | 
         
            +
                    - Test code
         
     | 
| 52 | 
         
            +
                    - Other (Please specify in the next section)
         
     | 
| 53 | 
         
            +
                validations:
         
     | 
| 54 | 
         
            +
                  required: false
         
     | 
| 55 | 
         
            +
              - type: textarea
         
     | 
| 56 | 
         
            +
                id: info
         
     | 
| 57 | 
         
            +
                attributes:
         
     | 
| 58 | 
         
            +
                  label: Problem description & steps to reproduce
         
     | 
| 59 | 
         
            +
                  description: >
         
     | 
| 60 | 
         
            +
                    Please give us a summary of the problem and tell us how to reproduce it (if applicable).
         
     | 
| 61 | 
         
            +
                validations:
         
     | 
| 62 | 
         
            +
                  required: true
         
     | 
| 63 | 
         
            +
              - type: textarea
         
     | 
| 64 | 
         
            +
                id: first_bad_commit
         
     | 
| 65 | 
         
            +
                attributes:
         
     | 
| 66 | 
         
            +
                  label: First Bad Commit
         
     | 
| 67 | 
         
            +
                  description: >
         
     | 
| 68 | 
         
            +
                    If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
         
     | 
| 69 | 
         
            +
                    If possible, please do a git bisect and identify the exact commit that introduced the bug.
         
     | 
| 70 | 
         
            +
                validations:
         
     | 
| 71 | 
         
            +
                  required: false
         
     | 
| 72 | 
         
            +
              - type: textarea
         
     | 
| 73 | 
         
            +
                id: logs
         
     | 
| 74 | 
         
            +
                attributes:
         
     | 
| 75 | 
         
            +
                  label: Relevant log output
         
     | 
| 76 | 
         
            +
                  description: >
         
     | 
| 77 | 
         
            +
                      If applicable, please copy and paste any relevant log output, including the command that you entered and any generated text.
         
     | 
| 78 | 
         
            +
                      This will be automatically formatted into code, so no need for backticks.
         
     | 
| 79 | 
         
            +
                  render: shell
         
     | 
| 80 | 
         
            +
                validations:
         
     | 
| 81 | 
         
            +
                  required: false
         
     | 
    	
        llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml
    ADDED
    
    | 
         @@ -0,0 +1,51 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: Enhancement
         
     | 
| 2 | 
         
            +
            description: Used to request enhancements for llama.cpp.
         
     | 
| 3 | 
         
            +
            title: "Feature Request: "
         
     | 
| 4 | 
         
            +
            labels: ["enhancement"]
         
     | 
| 5 | 
         
            +
            body:
         
     | 
| 6 | 
         
            +
              - type: markdown
         
     | 
| 7 | 
         
            +
                attributes:
         
     | 
| 8 | 
         
            +
                  value: |
         
     | 
| 9 | 
         
            +
                    [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
              - type: checkboxes
         
     | 
| 12 | 
         
            +
                id: prerequisites
         
     | 
| 13 | 
         
            +
                attributes:
         
     | 
| 14 | 
         
            +
                  label: Prerequisites
         
     | 
| 15 | 
         
            +
                  description: Please confirm the following before submitting your enhancement request.
         
     | 
| 16 | 
         
            +
                  options:
         
     | 
| 17 | 
         
            +
                    - label: I am running the latest code. Mention the version if possible as well.
         
     | 
| 18 | 
         
            +
                      required: true
         
     | 
| 19 | 
         
            +
                    - label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
         
     | 
| 20 | 
         
            +
                      required: true
         
     | 
| 21 | 
         
            +
                    - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
         
     | 
| 22 | 
         
            +
                      required: true
         
     | 
| 23 | 
         
            +
                    - label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
         
     | 
| 24 | 
         
            +
                      required: true
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
              - type: textarea
         
     | 
| 27 | 
         
            +
                id: feature-description
         
     | 
| 28 | 
         
            +
                attributes:
         
     | 
| 29 | 
         
            +
                  label: Feature Description
         
     | 
| 30 | 
         
            +
                  description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
         
     | 
| 31 | 
         
            +
                  placeholder: Detailed description of the enhancement
         
     | 
| 32 | 
         
            +
                validations:
         
     | 
| 33 | 
         
            +
                  required: true
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
              - type: textarea
         
     | 
| 36 | 
         
            +
                id: motivation
         
     | 
| 37 | 
         
            +
                attributes:
         
     | 
| 38 | 
         
            +
                  label: Motivation
         
     | 
| 39 | 
         
            +
                  description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
         
     | 
| 40 | 
         
            +
                  placeholder: Explanation of why this feature is needed and its benefits
         
     | 
| 41 | 
         
            +
                validations:
         
     | 
| 42 | 
         
            +
                  required: true
         
     | 
| 43 | 
         
            +
             
     | 
| 44 | 
         
            +
              - type: textarea
         
     | 
| 45 | 
         
            +
                id: possible-implementation
         
     | 
| 46 | 
         
            +
                attributes:
         
     | 
| 47 | 
         
            +
                  label: Possible Implementation
         
     | 
| 48 | 
         
            +
                  description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
         
     | 
| 49 | 
         
            +
                  placeholder: Detailed description of potential implementation
         
     | 
| 50 | 
         
            +
                validations:
         
     | 
| 51 | 
         
            +
                  required: false
         
     | 
    	
        llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml
    ADDED
    
    | 
         @@ -0,0 +1,52 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: Research
         
     | 
| 2 | 
         
            +
            description: Track new technical research area.
         
     | 
| 3 | 
         
            +
            title: "Research: "
         
     | 
| 4 | 
         
            +
            labels: ["research 🔬"]
         
     | 
| 5 | 
         
            +
            body:
         
     | 
| 6 | 
         
            +
              - type: markdown
         
     | 
| 7 | 
         
            +
                attributes:
         
     | 
| 8 | 
         
            +
                  value: |
         
     | 
| 9 | 
         
            +
                    Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
              - type: checkboxes
         
     | 
| 12 | 
         
            +
                id: research-stage
         
     | 
| 13 | 
         
            +
                attributes:
         
     | 
| 14 | 
         
            +
                  label: Research Stage
         
     | 
| 15 | 
         
            +
                  description: Track general state of this research ticket
         
     | 
| 16 | 
         
            +
                  options:
         
     | 
| 17 | 
         
            +
                    - label: Background Research (Let's try to avoid reinventing the wheel)
         
     | 
| 18 | 
         
            +
                    - label: Hypothesis Formed (How do you think this will work and it's effect?)
         
     | 
| 19 | 
         
            +
                    - label: Strategy / Implementation Forming
         
     | 
| 20 | 
         
            +
                    - label: Analysis of results
         
     | 
| 21 | 
         
            +
                    - label: Debrief / Documentation (So people in the future can learn from us)
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
              - type: textarea
         
     | 
| 24 | 
         
            +
                id: background
         
     | 
| 25 | 
         
            +
                attributes:
         
     | 
| 26 | 
         
            +
                  label: Previous existing literature and research
         
     | 
| 27 | 
         
            +
                  description: Whats the current state of the art and whats the motivation for this research?
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
              - type: textarea
         
     | 
| 30 | 
         
            +
                id: hypothesis
         
     | 
| 31 | 
         
            +
                attributes:
         
     | 
| 32 | 
         
            +
                  label: Hypothesis
         
     | 
| 33 | 
         
            +
                  description: How do you think this will work and it's effect?
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
              - type: textarea
         
     | 
| 36 | 
         
            +
                id: implementation
         
     | 
| 37 | 
         
            +
                attributes:
         
     | 
| 38 | 
         
            +
                  label: Implementation
         
     | 
| 39 | 
         
            +
                  description: Got an approach? e.g. a PR ready to go?
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
              - type: textarea
         
     | 
| 42 | 
         
            +
                id: analysis
         
     | 
| 43 | 
         
            +
                attributes:
         
     | 
| 44 | 
         
            +
                  label: Analysis
         
     | 
| 45 | 
         
            +
                  description: How does the proposed implementation behave?
         
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
              - type: textarea
         
     | 
| 48 | 
         
            +
                id: logs
         
     | 
| 49 | 
         
            +
                attributes:
         
     | 
| 50 | 
         
            +
                  label: Relevant log output
         
     | 
| 51 | 
         
            +
                  description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
         
     | 
| 52 | 
         
            +
                  render: shell
         
     | 
    	
        llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml
    ADDED
    
    | 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: Refactor (Maintainers)
         
     | 
| 2 | 
         
            +
            description: Used to track refactoring opportunities.
         
     | 
| 3 | 
         
            +
            title: "Refactor: "
         
     | 
| 4 | 
         
            +
            labels: ["refactor"]
         
     | 
| 5 | 
         
            +
            body:
         
     | 
| 6 | 
         
            +
              - type: markdown
         
     | 
| 7 | 
         
            +
                attributes:
         
     | 
| 8 | 
         
            +
                  value: |
         
     | 
| 9 | 
         
            +
                    Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
         
     | 
| 10 | 
         
            +
                    Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
              - type: textarea
         
     | 
| 13 | 
         
            +
                id: background-description
         
     | 
| 14 | 
         
            +
                attributes:
         
     | 
| 15 | 
         
            +
                  label: Background Description
         
     | 
| 16 | 
         
            +
                  description: Please provide a detailed written description of the pain points you are trying to solve.
         
     | 
| 17 | 
         
            +
                  placeholder: Detailed description behind your motivation to request refactor
         
     | 
| 18 | 
         
            +
                validations:
         
     | 
| 19 | 
         
            +
                  required: true
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
              - type: textarea
         
     | 
| 22 | 
         
            +
                id: possible-approaches
         
     | 
| 23 | 
         
            +
                attributes:
         
     | 
| 24 | 
         
            +
                  label: Possible Refactor Approaches
         
     | 
| 25 | 
         
            +
                  description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
         
     | 
| 26 | 
         
            +
                  placeholder: Your idea of possible refactoring opportunity/approaches
         
     | 
| 27 | 
         
            +
                validations:
         
     | 
| 28 | 
         
            +
                  required: false
         
     | 
    	
        llama.cpp/.github/ISSUE_TEMPLATE/config.yml
    ADDED
    
    | 
         @@ -0,0 +1,11 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            blank_issues_enabled: true
         
     | 
| 2 | 
         
            +
            contact_links:
         
     | 
| 3 | 
         
            +
              - name: Got an idea?
         
     | 
| 4 | 
         
            +
                url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
         
     | 
| 5 | 
         
            +
                about: Pop it there. It may then become an enhancement ticket.
         
     | 
| 6 | 
         
            +
              - name: Got a question?
         
     | 
| 7 | 
         
            +
                url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
         
     | 
| 8 | 
         
            +
                about: Ask a question there!
         
     | 
| 9 | 
         
            +
              - name: Want to contribute?
         
     | 
| 10 | 
         
            +
                url: https://github.com/ggerganov/llama.cpp/wiki/contribute
         
     | 
| 11 | 
         
            +
                about: Head to the contribution guide page of the wiki for areas you can help with
         
     | 
    	
        llama.cpp/.github/labeler.yml
    ADDED
    
    | 
         @@ -0,0 +1,86 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # https://github.com/actions/labeler
         
     | 
| 2 | 
         
            +
            Kompute:
         
     | 
| 3 | 
         
            +
                - changed-files:
         
     | 
| 4 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 5 | 
         
            +
                        - ggml/include/ggml-kompute.h
         
     | 
| 6 | 
         
            +
                        - ggml/src/ggml-kompute/**
         
     | 
| 7 | 
         
            +
                        - README-kompute.md
         
     | 
| 8 | 
         
            +
            Apple Metal:
         
     | 
| 9 | 
         
            +
                - changed-files:
         
     | 
| 10 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 11 | 
         
            +
                        - ggml/include/ggml-metal.h
         
     | 
| 12 | 
         
            +
                        - ggml/src/ggml-metal/**
         
     | 
| 13 | 
         
            +
                        - README-metal.md
         
     | 
| 14 | 
         
            +
            SYCL:
         
     | 
| 15 | 
         
            +
                - changed-files:
         
     | 
| 16 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 17 | 
         
            +
                        - ggml/include/ggml-sycl.h
         
     | 
| 18 | 
         
            +
                        - ggml/src/ggml-sycl/**
         
     | 
| 19 | 
         
            +
                        - docs/backend/SYCL.md
         
     | 
| 20 | 
         
            +
                        - examples/sycl/**
         
     | 
| 21 | 
         
            +
            Nvidia GPU:
         
     | 
| 22 | 
         
            +
                - changed-files:
         
     | 
| 23 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 24 | 
         
            +
                        - ggml/include/ggml-cuda.h
         
     | 
| 25 | 
         
            +
                        - ggml/src/ggml-cuda/**
         
     | 
| 26 | 
         
            +
            Vulkan:
         
     | 
| 27 | 
         
            +
                - changed-files:
         
     | 
| 28 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 29 | 
         
            +
                        - ggml/include/ggml-vulkan.h
         
     | 
| 30 | 
         
            +
                        - ggml/src/ggml-vulkan/**
         
     | 
| 31 | 
         
            +
            documentation:
         
     | 
| 32 | 
         
            +
                - changed-files:
         
     | 
| 33 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 34 | 
         
            +
                        - docs/**
         
     | 
| 35 | 
         
            +
                        - media/**
         
     | 
| 36 | 
         
            +
            testing:
         
     | 
| 37 | 
         
            +
                - changed-files:
         
     | 
| 38 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 39 | 
         
            +
                        - tests/**
         
     | 
| 40 | 
         
            +
            build:
         
     | 
| 41 | 
         
            +
                - changed-files:
         
     | 
| 42 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 43 | 
         
            +
                        - cmake/**
         
     | 
| 44 | 
         
            +
                        - CMakeLists.txt
         
     | 
| 45 | 
         
            +
                        - CMakePresets.json
         
     | 
| 46 | 
         
            +
            examples:
         
     | 
| 47 | 
         
            +
                - changed-files:
         
     | 
| 48 | 
         
            +
                    - any-glob-to-any-file: examples/**
         
     | 
| 49 | 
         
            +
            devops:
         
     | 
| 50 | 
         
            +
                - changed-files:
         
     | 
| 51 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 52 | 
         
            +
                        - .devops/**
         
     | 
| 53 | 
         
            +
                        - .github/**
         
     | 
| 54 | 
         
            +
                        - ci/**
         
     | 
| 55 | 
         
            +
            python:
         
     | 
| 56 | 
         
            +
                - changed-files:
         
     | 
| 57 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 58 | 
         
            +
                        - "**/*.py"
         
     | 
| 59 | 
         
            +
                        - requirements/**
         
     | 
| 60 | 
         
            +
                        - gguf-py/**
         
     | 
| 61 | 
         
            +
                        - .flake8
         
     | 
| 62 | 
         
            +
            script:
         
     | 
| 63 | 
         
            +
                - changed-files:
         
     | 
| 64 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 65 | 
         
            +
                        - scripts/**
         
     | 
| 66 | 
         
            +
            android:
         
     | 
| 67 | 
         
            +
                - changed-files:
         
     | 
| 68 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 69 | 
         
            +
                        - examples/llama.android/**
         
     | 
| 70 | 
         
            +
            server:
         
     | 
| 71 | 
         
            +
                - changed-files:
         
     | 
| 72 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 73 | 
         
            +
                        - examples/server/**
         
     | 
| 74 | 
         
            +
            ggml:
         
     | 
| 75 | 
         
            +
                - changed-files:
         
     | 
| 76 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 77 | 
         
            +
                        - ggml/**
         
     | 
| 78 | 
         
            +
            nix:
         
     | 
| 79 | 
         
            +
                - changed-files:
         
     | 
| 80 | 
         
            +
                    - any-glob-to-any-file:
         
     | 
| 81 | 
         
            +
                        - "**/*.nix"
         
     | 
| 82 | 
         
            +
                        - .github/workflows/nix-*.yml
         
     | 
| 83 | 
         
            +
                        - .devops/nix/nixpkgs-instances.nix
         
     | 
| 84 | 
         
            +
            embedding:
         
     | 
| 85 | 
         
            +
                - changed-files:
         
     | 
| 86 | 
         
            +
                    - any-glob-to-any-file: examples/embedding/
         
     | 
    	
        llama.cpp/.github/pull_request_template.md
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            *Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
         
     | 
    	
        llama.cpp/.github/workflows/bench.yml.disabled
    ADDED
    
    | 
         @@ -0,0 +1,315 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # TODO: there have been some issues with the workflow, so disabling for now
         
     | 
| 2 | 
         
            +
            #       https://github.com/ggerganov/llama.cpp/issues/7893
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            # Benchmark
         
     | 
| 5 | 
         
            +
            name: Benchmark
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            on:
         
     | 
| 8 | 
         
            +
              workflow_dispatch:
         
     | 
| 9 | 
         
            +
                inputs:
         
     | 
| 10 | 
         
            +
                  gpu-series:
         
     | 
| 11 | 
         
            +
                    description: 'Azure GPU series to run with'
         
     | 
| 12 | 
         
            +
                    required: true
         
     | 
| 13 | 
         
            +
                    type: choice
         
     | 
| 14 | 
         
            +
                    options:
         
     | 
| 15 | 
         
            +
                      - Standard_NC4as_T4_v3
         
     | 
| 16 | 
         
            +
                      - Standard_NC24ads_A100_v4
         
     | 
| 17 | 
         
            +
                      - Standard_NC80adis_H100_v5
         
     | 
| 18 | 
         
            +
                  sha:
         
     | 
| 19 | 
         
            +
                    description: 'Commit SHA1 to build'
         
     | 
| 20 | 
         
            +
                    required: false
         
     | 
| 21 | 
         
            +
                    type: string
         
     | 
| 22 | 
         
            +
                  duration:
         
     | 
| 23 | 
         
            +
                    description: 'Duration of the bench'
         
     | 
| 24 | 
         
            +
                    type: string
         
     | 
| 25 | 
         
            +
                    default: 10m
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
              push:
         
     | 
| 28 | 
         
            +
                branches:
         
     | 
| 29 | 
         
            +
                  - master
         
     | 
| 30 | 
         
            +
                paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
         
     | 
| 31 | 
         
            +
              pull_request_target:
         
     | 
| 32 | 
         
            +
                types: [opened, synchronize, reopened]
         
     | 
| 33 | 
         
            +
                paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
         
     | 
| 34 | 
         
            +
              schedule:
         
     | 
| 35 | 
         
            +
                -  cron: '04 2 * * *'
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            concurrency:
         
     | 
| 38 | 
         
            +
              group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
         
     | 
| 39 | 
         
            +
              cancel-in-progress: true
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
            jobs:
         
     | 
| 42 | 
         
            +
              bench-server-baseline:
         
     | 
| 43 | 
         
            +
                runs-on: Standard_NC4as_T4_v3
         
     | 
| 44 | 
         
            +
                env:
         
     | 
| 45 | 
         
            +
                  RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
         
     | 
| 46 | 
         
            +
                  N_USERS: 8
         
     | 
| 47 | 
         
            +
                  DURATION: 10m
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
            +
                strategy:
         
     | 
| 50 | 
         
            +
                  matrix:
         
     | 
| 51 | 
         
            +
                    model: [phi-2]
         
     | 
| 52 | 
         
            +
                    ftype: [q4_0, q8_0, f16]
         
     | 
| 53 | 
         
            +
                    include:
         
     | 
| 54 | 
         
            +
                      - model: phi-2
         
     | 
| 55 | 
         
            +
                        ftype: q4_0
         
     | 
| 56 | 
         
            +
                        pr_comment_enabled: "true"
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
                if: |
         
     | 
| 59 | 
         
            +
                  inputs.gpu-series == 'Standard_NC4as_T4_v3'
         
     | 
| 60 | 
         
            +
                  || (
         
     | 
| 61 | 
         
            +
                    github.event_name == 'schedule'
         
     | 
| 62 | 
         
            +
                    && github.ref_name == 'master'
         
     | 
| 63 | 
         
            +
                    && github.repository_owner == 'ggerganov'
         
     | 
| 64 | 
         
            +
                  )
         
     | 
| 65 | 
         
            +
                  || github.event_name == 'pull_request_target'
         
     | 
| 66 | 
         
            +
                  || (
         
     | 
| 67 | 
         
            +
                    github.event_name == 'push'
         
     | 
| 68 | 
         
            +
                    && github.event.ref == 'refs/heads/master'
         
     | 
| 69 | 
         
            +
                    && github.repository_owner == 'ggerganov'
         
     | 
| 70 | 
         
            +
                  )
         
     | 
| 71 | 
         
            +
                steps:
         
     | 
| 72 | 
         
            +
                  - name: Clone
         
     | 
| 73 | 
         
            +
                    id: checkout
         
     | 
| 74 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 75 | 
         
            +
                    with:
         
     | 
| 76 | 
         
            +
                      fetch-depth: 0
         
     | 
| 77 | 
         
            +
                      ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
         
     | 
| 78 | 
         
            +
             
     | 
| 79 | 
         
            +
                  - name: Install python env
         
     | 
| 80 | 
         
            +
                    id: pipenv
         
     | 
| 81 | 
         
            +
                    run: |
         
     | 
| 82 | 
         
            +
                      cd examples/server/bench
         
     | 
| 83 | 
         
            +
                      python3 -m venv venv
         
     | 
| 84 | 
         
            +
                      source venv/bin/activate
         
     | 
| 85 | 
         
            +
                      pip install -r requirements.txt
         
     | 
| 86 | 
         
            +
             
     | 
| 87 | 
         
            +
                  - name: Prometheus
         
     | 
| 88 | 
         
            +
                    id: install_prometheus
         
     | 
| 89 | 
         
            +
                    run: |
         
     | 
| 90 | 
         
            +
                      wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
         
     | 
| 91 | 
         
            +
                      tar xzf prometheus*.tar.gz --strip-components=1
         
     | 
| 92 | 
         
            +
                      ./prometheus --config.file=examples/server/bench/prometheus.yml &
         
     | 
| 93 | 
         
            +
                      while ! nc -z localhost 9090; do
         
     | 
| 94 | 
         
            +
                        sleep 0.1
         
     | 
| 95 | 
         
            +
                      done
         
     | 
| 96 | 
         
            +
             
     | 
| 97 | 
         
            +
                  - name: Set up Go
         
     | 
| 98 | 
         
            +
                    uses: actions/setup-go@v5
         
     | 
| 99 | 
         
            +
                    with:
         
     | 
| 100 | 
         
            +
                      go-version: '1.21'
         
     | 
| 101 | 
         
            +
             
     | 
| 102 | 
         
            +
                  - name: Install k6 and xk6-sse
         
     | 
| 103 | 
         
            +
                    id: k6_installation
         
     | 
| 104 | 
         
            +
                    run: |
         
     | 
| 105 | 
         
            +
                      cd examples/server/bench
         
     | 
| 106 | 
         
            +
                      go install go.k6.io/xk6/cmd/xk6@latest
         
     | 
| 107 | 
         
            +
                      xk6 build master \
         
     | 
| 108 | 
         
            +
                          --with github.com/phymbert/xk6-sse
         
     | 
| 109 | 
         
            +
             
     | 
| 110 | 
         
            +
                  - name: Build
         
     | 
| 111 | 
         
            +
                    id: cmake_build
         
     | 
| 112 | 
         
            +
                    run: |
         
     | 
| 113 | 
         
            +
                      set -eux
         
     | 
| 114 | 
         
            +
                      cmake -B build \
         
     | 
| 115 | 
         
            +
                          -DGGML_NATIVE=OFF \
         
     | 
| 116 | 
         
            +
                          -DLLAMA_BUILD_SERVER=ON \
         
     | 
| 117 | 
         
            +
                          -DLLAMA_CURL=ON \
         
     | 
| 118 | 
         
            +
                          -DLLAMA_CUBLAS=ON \
         
     | 
| 119 | 
         
            +
                          -DCUDAToolkit_ROOT=/usr/local/cuda \
         
     | 
| 120 | 
         
            +
                          -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
         
     | 
| 121 | 
         
            +
                          -DCMAKE_CUDA_ARCHITECTURES=75 \
         
     | 
| 122 | 
         
            +
                          -DLLAMA_FATAL_WARNINGS=OFF \
         
     | 
| 123 | 
         
            +
                          -DLLAMA_ALL_WARNINGS=OFF \
         
     | 
| 124 | 
         
            +
                          -DCMAKE_BUILD_TYPE=Release;
         
     | 
| 125 | 
         
            +
                      cmake --build build --config Release -j $(nproc) --target llama-server
         
     | 
| 126 | 
         
            +
             
     | 
| 127 | 
         
            +
                  - name: Download the dataset
         
     | 
| 128 | 
         
            +
                    id: download_dataset
         
     | 
| 129 | 
         
            +
                    run: |
         
     | 
| 130 | 
         
            +
                      cd examples/server/bench
         
     | 
| 131 | 
         
            +
                      wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
         
     | 
| 132 | 
         
            +
             
     | 
| 133 | 
         
            +
                  - name: Server bench
         
     | 
| 134 | 
         
            +
                    id: server_bench
         
     | 
| 135 | 
         
            +
                    env:
         
     | 
| 136 | 
         
            +
                        HEAD_REF: ${{ github.head_ref || github.ref_name }}
         
     | 
| 137 | 
         
            +
                    run: |
         
     | 
| 138 | 
         
            +
                      set -eux
         
     | 
| 139 | 
         
            +
             
     | 
| 140 | 
         
            +
                      cd examples/server/bench
         
     | 
| 141 | 
         
            +
                      source venv/bin/activate
         
     | 
| 142 | 
         
            +
                      python bench.py \
         
     | 
| 143 | 
         
            +
                          --runner-label ${{ env.RUNNER_LABEL }} \
         
     | 
| 144 | 
         
            +
                          --name ${{ github.job }} \
         
     | 
| 145 | 
         
            +
                          --branch $HEAD_REF \
         
     | 
| 146 | 
         
            +
                          --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
         
     | 
| 147 | 
         
            +
                          --scenario script.js \
         
     | 
| 148 | 
         
            +
                          --duration ${{ github.event.inputs.duration || env.DURATION }} \
         
     | 
| 149 | 
         
            +
                          --hf-repo ggml-org/models	 \
         
     | 
| 150 | 
         
            +
                          --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
         
     | 
| 151 | 
         
            +
                          --model-path-prefix /models \
         
     | 
| 152 | 
         
            +
                          --parallel ${{ env.N_USERS }} \
         
     | 
| 153 | 
         
            +
                          -ngl 33 \
         
     | 
| 154 | 
         
            +
                          --batch-size 2048 \
         
     | 
| 155 | 
         
            +
                          --ubatch-size	256 \
         
     | 
| 156 | 
         
            +
                          --ctx-size 16384 \
         
     | 
| 157 | 
         
            +
                          --n-prompts 1000 \
         
     | 
| 158 | 
         
            +
                          --max-prompt-tokens 1024 \
         
     | 
| 159 | 
         
            +
                          --max-tokens 2048
         
     | 
| 160 | 
         
            +
             
     | 
| 161 | 
         
            +
                      cat results.github.env >> $GITHUB_ENV
         
     | 
| 162 | 
         
            +
             
     | 
| 163 | 
         
            +
                      # Remove dataset as we do not want it in the artefact
         
     | 
| 164 | 
         
            +
                      rm ShareGPT_V3_unfiltered_cleaned_split.json
         
     | 
| 165 | 
         
            +
             
     | 
| 166 | 
         
            +
                  - uses: actions/upload-artifact@v4
         
     | 
| 167 | 
         
            +
                    with:
         
     | 
| 168 | 
         
            +
                      name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
         
     | 
| 169 | 
         
            +
                      compression-level: 9
         
     | 
| 170 | 
         
            +
                      path: |
         
     | 
| 171 | 
         
            +
                        examples/server/bench/*.jpg
         
     | 
| 172 | 
         
            +
                        examples/server/bench/*.json
         
     | 
| 173 | 
         
            +
                        examples/server/bench/*.log
         
     | 
| 174 | 
         
            +
             
     | 
| 175 | 
         
            +
                  - name: Commit status
         
     | 
| 176 | 
         
            +
                    uses: Sibz/github-status-action@v1
         
     | 
| 177 | 
         
            +
                    with:
         
     | 
| 178 | 
         
            +
                      authToken: ${{secrets.GITHUB_TOKEN}}
         
     | 
| 179 | 
         
            +
                      sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
         
     | 
| 180 | 
         
            +
                      context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
         
     | 
| 181 | 
         
            +
                      description: |
         
     | 
| 182 | 
         
            +
                        ${{ env.BENCH_RESULTS }}
         
     | 
| 183 | 
         
            +
                      state: 'success'
         
     | 
| 184 | 
         
            +
             
     | 
| 185 | 
         
            +
                  - name: Upload benchmark images
         
     | 
| 186 | 
         
            +
                    uses: devicons/public-upload-to-imgur@v2.2.2
         
     | 
| 187 | 
         
            +
                    continue-on-error: true # Important as it looks unstable: 503
         
     | 
| 188 | 
         
            +
                    id: imgur_step
         
     | 
| 189 | 
         
            +
                    with:
         
     | 
| 190 | 
         
            +
                      client_id: ${{secrets.IMGUR_CLIENT_ID}}
         
     | 
| 191 | 
         
            +
                      path: |
         
     | 
| 192 | 
         
            +
                        examples/server/bench/prompt_tokens_seconds.jpg
         
     | 
| 193 | 
         
            +
                        examples/server/bench/predicted_tokens_seconds.jpg
         
     | 
| 194 | 
         
            +
                        examples/server/bench/kv_cache_usage_ratio.jpg
         
     | 
| 195 | 
         
            +
                        examples/server/bench/requests_processing.jpg
         
     | 
| 196 | 
         
            +
             
     | 
| 197 | 
         
            +
                  - name: Extract mermaid
         
     | 
| 198 | 
         
            +
                    id: set_mermaid
         
     | 
| 199 | 
         
            +
                    run: |
         
     | 
| 200 | 
         
            +
                      set -eux
         
     | 
| 201 | 
         
            +
             
     | 
| 202 | 
         
            +
                      cd examples/server/bench
         
     | 
| 203 | 
         
            +
                      PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
         
     | 
| 204 | 
         
            +
                      echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
         
     | 
| 205 | 
         
            +
                      echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
         
     | 
| 206 | 
         
            +
                      echo "EOF" >> $GITHUB_ENV
         
     | 
| 207 | 
         
            +
             
     | 
| 208 | 
         
            +
                      PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
         
     | 
| 209 | 
         
            +
                      echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
         
     | 
| 210 | 
         
            +
                      echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
         
     | 
| 211 | 
         
            +
                      echo "EOF" >> $GITHUB_ENV
         
     | 
| 212 | 
         
            +
             
     | 
| 213 | 
         
            +
                      KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
         
     | 
| 214 | 
         
            +
                      echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
         
     | 
| 215 | 
         
            +
                      echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
         
     | 
| 216 | 
         
            +
                      echo "EOF" >> $GITHUB_ENV
         
     | 
| 217 | 
         
            +
             
     | 
| 218 | 
         
            +
                      REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
         
     | 
| 219 | 
         
            +
                      echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
         
     | 
| 220 | 
         
            +
                      echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
         
     | 
| 221 | 
         
            +
                      echo "EOF" >> $GITHUB_ENV
         
     | 
| 222 | 
         
            +
             
     | 
| 223 | 
         
            +
                  - name: Extract image url
         
     | 
| 224 | 
         
            +
                    id: extract_image_url
         
     | 
| 225 | 
         
            +
                    continue-on-error: true
         
     | 
| 226 | 
         
            +
                    run: |
         
     | 
| 227 | 
         
            +
                      set -eux
         
     | 
| 228 | 
         
            +
             
     | 
| 229 | 
         
            +
                      echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
         
     | 
| 230 | 
         
            +
                      echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
         
     | 
| 231 | 
         
            +
                      echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
         
     | 
| 232 | 
         
            +
                      echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
         
     | 
| 233 | 
         
            +
             
     | 
| 234 | 
         
            +
                  - name: Comment PR
         
     | 
| 235 | 
         
            +
                    uses: mshick/add-pr-comment@v2
         
     | 
| 236 | 
         
            +
                    id: comment_pr
         
     | 
| 237 | 
         
            +
                    if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
         
     | 
| 238 | 
         
            +
                    with:
         
     | 
| 239 | 
         
            +
                      message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
         
     | 
| 240 | 
         
            +
                      message: |
         
     | 
| 241 | 
         
            +
                        <p align="center">
         
     | 
| 242 | 
         
            +
             
     | 
| 243 | 
         
            +
                        📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
         
     | 
| 244 | 
         
            +
             
     | 
| 245 | 
         
            +
                        </p>
         
     | 
| 246 | 
         
            +
             
     | 
| 247 | 
         
            +
                        <details>
         
     | 
| 248 | 
         
            +
             
     | 
| 249 | 
         
            +
                        <summary>Expand details for performance related PR only</summary>
         
     | 
| 250 | 
         
            +
             
     | 
| 251 | 
         
            +
                        - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
         
     | 
| 252 | 
         
            +
                        - HTTP request          : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms        p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
         
     | 
| 253 | 
         
            +
                        - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
         
     | 
| 254 | 
         
            +
                        - Token generation  (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
         
     | 
| 255 | 
         
            +
                        - ${{ env.BENCH_GRAPH_XLABEL }}
         
     | 
| 256 | 
         
            +
             
     | 
| 257 | 
         
            +
             
     | 
| 258 | 
         
            +
                        <p align="center">
         
     | 
| 259 | 
         
            +
             
     | 
| 260 | 
         
            +
                        <img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
         
     | 
| 261 | 
         
            +
             
     | 
| 262 | 
         
            +
                        <details>
         
     | 
| 263 | 
         
            +
             
     | 
| 264 | 
         
            +
                        <summary>More</summary>
         
     | 
| 265 | 
         
            +
             
     | 
| 266 | 
         
            +
                        ```mermaid
         
     | 
| 267 | 
         
            +
                        ${{ env.PROMPT_TOKENS_SECONDS }}
         
     | 
| 268 | 
         
            +
                        ```
         
     | 
| 269 | 
         
            +
             
     | 
| 270 | 
         
            +
                        </details>
         
     | 
| 271 | 
         
            +
             
     | 
| 272 | 
         
            +
                        <img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
         
     | 
| 273 | 
         
            +
             
     | 
| 274 | 
         
            +
                        <details>
         
     | 
| 275 | 
         
            +
                            <summary>More</summary>
         
     | 
| 276 | 
         
            +
             
     | 
| 277 | 
         
            +
                        ```mermaid
         
     | 
| 278 | 
         
            +
                        ${{ env.PREDICTED_TOKENS_SECONDS }}
         
     | 
| 279 | 
         
            +
                        ```
         
     | 
| 280 | 
         
            +
             
     | 
| 281 | 
         
            +
                        </details>
         
     | 
| 282 | 
         
            +
             
     | 
| 283 | 
         
            +
                        </p>
         
     | 
| 284 | 
         
            +
             
     | 
| 285 | 
         
            +
                        <details>
         
     | 
| 286 | 
         
            +
             
     | 
| 287 | 
         
            +
                        <summary>Details</summary>
         
     | 
| 288 | 
         
            +
             
     | 
| 289 | 
         
            +
                        <p align="center">
         
     | 
| 290 | 
         
            +
             
     | 
| 291 | 
         
            +
                        <img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
         
     | 
| 292 | 
         
            +
             
     | 
| 293 | 
         
            +
                        <details>
         
     | 
| 294 | 
         
            +
                            <summary>More</summary>
         
     | 
| 295 | 
         
            +
             
     | 
| 296 | 
         
            +
                        ```mermaid
         
     | 
| 297 | 
         
            +
                        ${{ env.KV_CACHE_USAGE_RATIO }}
         
     | 
| 298 | 
         
            +
                        ```
         
     | 
| 299 | 
         
            +
             
     | 
| 300 | 
         
            +
                        </details>
         
     | 
| 301 | 
         
            +
             
     | 
| 302 | 
         
            +
                        <img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
         
     | 
| 303 | 
         
            +
             
     | 
| 304 | 
         
            +
                        <details>
         
     | 
| 305 | 
         
            +
                            <summary>More</summary>
         
     | 
| 306 | 
         
            +
             
     | 
| 307 | 
         
            +
                        ```mermaid
         
     | 
| 308 | 
         
            +
                        ${{ env.REQUESTS_PROCESSING }}
         
     | 
| 309 | 
         
            +
                        ```
         
     | 
| 310 | 
         
            +
             
     | 
| 311 | 
         
            +
                        </details>
         
     | 
| 312 | 
         
            +
             
     | 
| 313 | 
         
            +
                        </p>
         
     | 
| 314 | 
         
            +
                        </details>
         
     | 
| 315 | 
         
            +
                        </details>
         
     | 
    	
        llama.cpp/.github/workflows/build.yml
    ADDED
    
    | 
         @@ -0,0 +1,1416 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: CI
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            on:
         
     | 
| 4 | 
         
            +
              workflow_dispatch: # allows manual triggering
         
     | 
| 5 | 
         
            +
                inputs:
         
     | 
| 6 | 
         
            +
                  create_release:
         
     | 
| 7 | 
         
            +
                    description: 'Create new release'
         
     | 
| 8 | 
         
            +
                    required: true
         
     | 
| 9 | 
         
            +
                    type: boolean
         
     | 
| 10 | 
         
            +
              push:
         
     | 
| 11 | 
         
            +
                branches:
         
     | 
| 12 | 
         
            +
                  - master
         
     | 
| 13 | 
         
            +
                paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
         
     | 
| 14 | 
         
            +
              pull_request:
         
     | 
| 15 | 
         
            +
                types: [opened, synchronize, reopened]
         
     | 
| 16 | 
         
            +
                paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            concurrency:
         
     | 
| 19 | 
         
            +
              group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
         
     | 
| 20 | 
         
            +
              cancel-in-progress: true
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            # Fine-grant permission
         
     | 
| 23 | 
         
            +
            # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
         
     | 
| 24 | 
         
            +
            permissions:
         
     | 
| 25 | 
         
            +
              contents: write # for creating release
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
            env:
         
     | 
| 28 | 
         
            +
              BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
         
     | 
| 29 | 
         
            +
              GGML_NLOOP: 3
         
     | 
| 30 | 
         
            +
              GGML_N_THREADS: 1
         
     | 
| 31 | 
         
            +
              LLAMA_LOG_COLORS: 1
         
     | 
| 32 | 
         
            +
              LLAMA_LOG_PREFIX: 1
         
     | 
| 33 | 
         
            +
              LLAMA_LOG_TIMESTAMPS: 1
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
            jobs:
         
     | 
| 36 | 
         
            +
              macOS-latest-cmake-arm64:
         
     | 
| 37 | 
         
            +
                runs-on: macos-14
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
                steps:
         
     | 
| 40 | 
         
            +
                  - name: Clone
         
     | 
| 41 | 
         
            +
                    id: checkout
         
     | 
| 42 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 43 | 
         
            +
                    with:
         
     | 
| 44 | 
         
            +
                      fetch-depth: 0
         
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
                  - name: Dependencies
         
     | 
| 47 | 
         
            +
                    id: depends
         
     | 
| 48 | 
         
            +
                    continue-on-error: true
         
     | 
| 49 | 
         
            +
                    run: |
         
     | 
| 50 | 
         
            +
                      brew update
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
                  - name: Build
         
     | 
| 53 | 
         
            +
                    id: cmake_build
         
     | 
| 54 | 
         
            +
                    run: |
         
     | 
| 55 | 
         
            +
                      sysctl -a
         
     | 
| 56 | 
         
            +
                      mkdir build
         
     | 
| 57 | 
         
            +
                      cd build
         
     | 
| 58 | 
         
            +
                      cmake .. \
         
     | 
| 59 | 
         
            +
                        -DLLAMA_FATAL_WARNINGS=ON \
         
     | 
| 60 | 
         
            +
                        -DLLAMA_CURL=ON \
         
     | 
| 61 | 
         
            +
                        -DGGML_METAL_USE_BF16=ON \
         
     | 
| 62 | 
         
            +
                        -DGGML_METAL_EMBED_LIBRARY=ON \
         
     | 
| 63 | 
         
            +
                        -DGGML_RPC=ON \
         
     | 
| 64 | 
         
            +
                        -DBUILD_SHARED_LIBS=OFF
         
     | 
| 65 | 
         
            +
                      cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
         
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
                  - name: Test
         
     | 
| 68 | 
         
            +
                    id: cmake_test
         
     | 
| 69 | 
         
            +
                    run: |
         
     | 
| 70 | 
         
            +
                      cd build
         
     | 
| 71 | 
         
            +
                      ctest -L 'main|curl' --verbose --timeout 900
         
     | 
| 72 | 
         
            +
             
     | 
| 73 | 
         
            +
                  - name: Determine tag name
         
     | 
| 74 | 
         
            +
                    id: tag
         
     | 
| 75 | 
         
            +
                    shell: bash
         
     | 
| 76 | 
         
            +
                    run: |
         
     | 
| 77 | 
         
            +
                      BUILD_NUMBER="$(git rev-list --count HEAD)"
         
     | 
| 78 | 
         
            +
                      SHORT_HASH="$(git rev-parse --short=7 HEAD)"
         
     | 
| 79 | 
         
            +
                      if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
         
     | 
| 80 | 
         
            +
                        echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
         
     | 
| 81 | 
         
            +
                      else
         
     | 
| 82 | 
         
            +
                        SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
         
     | 
| 83 | 
         
            +
                        echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
         
     | 
| 84 | 
         
            +
                      fi
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
            +
                  - name: Pack artifacts
         
     | 
| 87 | 
         
            +
                    id: pack_artifacts
         
     | 
| 88 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 89 | 
         
            +
                    run: |
         
     | 
| 90 | 
         
            +
                      cp LICENSE ./build/bin/
         
     | 
| 91 | 
         
            +
                      zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
         
     | 
| 92 | 
         
            +
             
     | 
| 93 | 
         
            +
                  - name: Upload artifacts
         
     | 
| 94 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 95 | 
         
            +
                    uses: actions/upload-artifact@v4
         
     | 
| 96 | 
         
            +
                    with:
         
     | 
| 97 | 
         
            +
                      path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
         
     | 
| 98 | 
         
            +
                      name: llama-bin-macos-arm64.zip
         
     | 
| 99 | 
         
            +
             
     | 
| 100 | 
         
            +
              macOS-latest-cmake-x64:
         
     | 
| 101 | 
         
            +
                runs-on: macos-13
         
     | 
| 102 | 
         
            +
             
     | 
| 103 | 
         
            +
                steps:
         
     | 
| 104 | 
         
            +
                  - name: Clone
         
     | 
| 105 | 
         
            +
                    id: checkout
         
     | 
| 106 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 107 | 
         
            +
                    with:
         
     | 
| 108 | 
         
            +
                      fetch-depth: 0
         
     | 
| 109 | 
         
            +
             
     | 
| 110 | 
         
            +
                  - name: Dependencies
         
     | 
| 111 | 
         
            +
                    id: depends
         
     | 
| 112 | 
         
            +
                    continue-on-error: true
         
     | 
| 113 | 
         
            +
                    run: |
         
     | 
| 114 | 
         
            +
                      brew update
         
     | 
| 115 | 
         
            +
             
     | 
| 116 | 
         
            +
                  - name: Build
         
     | 
| 117 | 
         
            +
                    id: cmake_build
         
     | 
| 118 | 
         
            +
                    run: |
         
     | 
| 119 | 
         
            +
                      sysctl -a
         
     | 
| 120 | 
         
            +
                      # Metal is disabled due to intermittent failures with Github runners not having a GPU:
         
     | 
| 121 | 
         
            +
                      # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
         
     | 
| 122 | 
         
            +
                      cmake -B build \
         
     | 
| 123 | 
         
            +
                        -DLLAMA_FATAL_WARNINGS=ON \
         
     | 
| 124 | 
         
            +
                        -DLLAMA_CURL=ON \
         
     | 
| 125 | 
         
            +
                        -DGGML_METAL=OFF \
         
     | 
| 126 | 
         
            +
                        -DGGML_RPC=ON \
         
     | 
| 127 | 
         
            +
                        -DBUILD_SHARED_LIBS=OFF
         
     | 
| 128 | 
         
            +
                      cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
         
     | 
| 129 | 
         
            +
             
     | 
| 130 | 
         
            +
                  - name: Test
         
     | 
| 131 | 
         
            +
                    id: cmake_test
         
     | 
| 132 | 
         
            +
                    run: |
         
     | 
| 133 | 
         
            +
                      cd build
         
     | 
| 134 | 
         
            +
                      ctest -L main --verbose --timeout 900
         
     | 
| 135 | 
         
            +
             
     | 
| 136 | 
         
            +
                  - name: Determine tag name
         
     | 
| 137 | 
         
            +
                    id: tag
         
     | 
| 138 | 
         
            +
                    shell: bash
         
     | 
| 139 | 
         
            +
                    run: |
         
     | 
| 140 | 
         
            +
                      BUILD_NUMBER="$(git rev-list --count HEAD)"
         
     | 
| 141 | 
         
            +
                      SHORT_HASH="$(git rev-parse --short=7 HEAD)"
         
     | 
| 142 | 
         
            +
                      if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
         
     | 
| 143 | 
         
            +
                        echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
         
     | 
| 144 | 
         
            +
                      else
         
     | 
| 145 | 
         
            +
                        SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
         
     | 
| 146 | 
         
            +
                        echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
         
     | 
| 147 | 
         
            +
                      fi
         
     | 
| 148 | 
         
            +
             
     | 
| 149 | 
         
            +
                  - name: Pack artifacts
         
     | 
| 150 | 
         
            +
                    id: pack_artifacts
         
     | 
| 151 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 152 | 
         
            +
                    run: |
         
     | 
| 153 | 
         
            +
                      cp LICENSE ./build/bin/
         
     | 
| 154 | 
         
            +
                      zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
         
     | 
| 155 | 
         
            +
             
     | 
| 156 | 
         
            +
                  - name: Upload artifacts
         
     | 
| 157 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 158 | 
         
            +
                    uses: actions/upload-artifact@v4
         
     | 
| 159 | 
         
            +
                    with:
         
     | 
| 160 | 
         
            +
                      path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
         
     | 
| 161 | 
         
            +
                      name: llama-bin-macos-x64.zip
         
     | 
| 162 | 
         
            +
             
     | 
| 163 | 
         
            +
              ubuntu-latest-cmake:
         
     | 
| 164 | 
         
            +
                runs-on: ubuntu-latest
         
     | 
| 165 | 
         
            +
             
     | 
| 166 | 
         
            +
                steps:
         
     | 
| 167 | 
         
            +
                  - name: Clone
         
     | 
| 168 | 
         
            +
                    id: checkout
         
     | 
| 169 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 170 | 
         
            +
                    with:
         
     | 
| 171 | 
         
            +
                      fetch-depth: 0
         
     | 
| 172 | 
         
            +
             
     | 
| 173 | 
         
            +
                  - name: Dependencies
         
     | 
| 174 | 
         
            +
                    id: depends
         
     | 
| 175 | 
         
            +
                    run: |
         
     | 
| 176 | 
         
            +
                      sudo apt-get update
         
     | 
| 177 | 
         
            +
                      sudo apt-get install build-essential libcurl4-openssl-dev
         
     | 
| 178 | 
         
            +
             
     | 
| 179 | 
         
            +
                  - name: Build
         
     | 
| 180 | 
         
            +
                    id: cmake_build
         
     | 
| 181 | 
         
            +
                    run: |
         
     | 
| 182 | 
         
            +
                      mkdir build
         
     | 
| 183 | 
         
            +
                      cd build
         
     | 
| 184 | 
         
            +
                      cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
         
     | 
| 185 | 
         
            +
                      cmake --build . --config Release -j $(nproc)
         
     | 
| 186 | 
         
            +
             
     | 
| 187 | 
         
            +
                  - name: Test
         
     | 
| 188 | 
         
            +
                    id: cmake_test
         
     | 
| 189 | 
         
            +
                    run: |
         
     | 
| 190 | 
         
            +
                      cd build
         
     | 
| 191 | 
         
            +
                      ctest -L 'main|curl' --verbose --timeout 900
         
     | 
| 192 | 
         
            +
             
     | 
| 193 | 
         
            +
                  - name: Test llama2c conversion
         
     | 
| 194 | 
         
            +
                    id: llama2c_test
         
     | 
| 195 | 
         
            +
                    run: |
         
     | 
| 196 | 
         
            +
                      cd build
         
     | 
| 197 | 
         
            +
                      echo "Fetch tokenizer"
         
     | 
| 198 | 
         
            +
                      wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
         
     | 
| 199 | 
         
            +
                      echo "Fetch llama2c model"
         
     | 
| 200 | 
         
            +
                      wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
         
     | 
| 201 | 
         
            +
                      ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
         
     | 
| 202 | 
         
            +
                      ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
         
     | 
| 203 | 
         
            +
             
     | 
| 204 | 
         
            +
                  - name: Determine tag name
         
     | 
| 205 | 
         
            +
                    id: tag
         
     | 
| 206 | 
         
            +
                    shell: bash
         
     | 
| 207 | 
         
            +
                    run: |
         
     | 
| 208 | 
         
            +
                      BUILD_NUMBER="$(git rev-list --count HEAD)"
         
     | 
| 209 | 
         
            +
                      SHORT_HASH="$(git rev-parse --short=7 HEAD)"
         
     | 
| 210 | 
         
            +
                      if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
         
     | 
| 211 | 
         
            +
                        echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
         
     | 
| 212 | 
         
            +
                      else
         
     | 
| 213 | 
         
            +
                        SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
         
     | 
| 214 | 
         
            +
                        echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
         
     | 
| 215 | 
         
            +
                      fi
         
     | 
| 216 | 
         
            +
             
     | 
| 217 | 
         
            +
                  - name: Pack artifacts
         
     | 
| 218 | 
         
            +
                    id: pack_artifacts
         
     | 
| 219 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 220 | 
         
            +
                    run: |
         
     | 
| 221 | 
         
            +
                      cp LICENSE ./build/bin/
         
     | 
| 222 | 
         
            +
                      zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
         
     | 
| 223 | 
         
            +
             
     | 
| 224 | 
         
            +
                  - name: Upload artifacts
         
     | 
| 225 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 226 | 
         
            +
                    uses: actions/upload-artifact@v4
         
     | 
| 227 | 
         
            +
                    with:
         
     | 
| 228 | 
         
            +
                      path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
         
     | 
| 229 | 
         
            +
                      name: llama-bin-ubuntu-x64.zip
         
     | 
| 230 | 
         
            +
             
     | 
| 231 | 
         
            +
              ubuntu-latest-cmake-sanitizer:
         
     | 
| 232 | 
         
            +
                runs-on: ubuntu-latest
         
     | 
| 233 | 
         
            +
             
     | 
| 234 | 
         
            +
                continue-on-error: true
         
     | 
| 235 | 
         
            +
             
     | 
| 236 | 
         
            +
                strategy:
         
     | 
| 237 | 
         
            +
                  matrix:
         
     | 
| 238 | 
         
            +
                    sanitizer: [ADDRESS, THREAD, UNDEFINED]
         
     | 
| 239 | 
         
            +
                    build_type: [Debug, Release]
         
     | 
| 240 | 
         
            +
             
     | 
| 241 | 
         
            +
                steps:
         
     | 
| 242 | 
         
            +
                  - name: Clone
         
     | 
| 243 | 
         
            +
                    id: checkout
         
     | 
| 244 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 245 | 
         
            +
             
     | 
| 246 | 
         
            +
                  - name: Dependencies
         
     | 
| 247 | 
         
            +
                    id: depends
         
     | 
| 248 | 
         
            +
                    run: |
         
     | 
| 249 | 
         
            +
                      sudo apt-get update
         
     | 
| 250 | 
         
            +
                      sudo apt-get install build-essential
         
     | 
| 251 | 
         
            +
             
     | 
| 252 | 
         
            +
                  - name: Build
         
     | 
| 253 | 
         
            +
                    id: cmake_build
         
     | 
| 254 | 
         
            +
                    if: ${{ matrix.sanitizer != 'THREAD' }}
         
     | 
| 255 | 
         
            +
                    run: |
         
     | 
| 256 | 
         
            +
                      mkdir build
         
     | 
| 257 | 
         
            +
                      cd build
         
     | 
| 258 | 
         
            +
                      cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
         
     | 
| 259 | 
         
            +
                      cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
         
     | 
| 260 | 
         
            +
             
     | 
| 261 | 
         
            +
                  - name: Build (no OpenMP)
         
     | 
| 262 | 
         
            +
                    id: cmake_build_no_openmp
         
     | 
| 263 | 
         
            +
                    if: ${{ matrix.sanitizer == 'THREAD' }}
         
     | 
| 264 | 
         
            +
                    run: |
         
     | 
| 265 | 
         
            +
                      mkdir build
         
     | 
| 266 | 
         
            +
                      cd build
         
     | 
| 267 | 
         
            +
                      cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF
         
     | 
| 268 | 
         
            +
                      cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
         
     | 
| 269 | 
         
            +
             
     | 
| 270 | 
         
            +
                  - name: Test
         
     | 
| 271 | 
         
            +
                    id: cmake_test
         
     | 
| 272 | 
         
            +
                    run: |
         
     | 
| 273 | 
         
            +
                      cd build
         
     | 
| 274 | 
         
            +
                      ctest -L main --verbose --timeout 900
         
     | 
| 275 | 
         
            +
             
     | 
| 276 | 
         
            +
              ubuntu-latest-cmake-rpc:
         
     | 
| 277 | 
         
            +
                runs-on: ubuntu-latest
         
     | 
| 278 | 
         
            +
             
     | 
| 279 | 
         
            +
                continue-on-error: true
         
     | 
| 280 | 
         
            +
             
     | 
| 281 | 
         
            +
                steps:
         
     | 
| 282 | 
         
            +
                  - name: Clone
         
     | 
| 283 | 
         
            +
                    id: checkout
         
     | 
| 284 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 285 | 
         
            +
             
     | 
| 286 | 
         
            +
                  - name: Dependencies
         
     | 
| 287 | 
         
            +
                    id: depends
         
     | 
| 288 | 
         
            +
                    run: |
         
     | 
| 289 | 
         
            +
                      sudo apt-get update
         
     | 
| 290 | 
         
            +
                      sudo apt-get install build-essential
         
     | 
| 291 | 
         
            +
             
     | 
| 292 | 
         
            +
                  - name: Build
         
     | 
| 293 | 
         
            +
                    id: cmake_build
         
     | 
| 294 | 
         
            +
                    run: |
         
     | 
| 295 | 
         
            +
                      mkdir build
         
     | 
| 296 | 
         
            +
                      cd build
         
     | 
| 297 | 
         
            +
                      cmake -DGGML_RPC=ON ..
         
     | 
| 298 | 
         
            +
                      cmake --build . --config Release -j $(nproc)
         
     | 
| 299 | 
         
            +
             
     | 
| 300 | 
         
            +
                  - name: Test
         
     | 
| 301 | 
         
            +
                    id: cmake_test
         
     | 
| 302 | 
         
            +
                    run: |
         
     | 
| 303 | 
         
            +
                      cd build
         
     | 
| 304 | 
         
            +
                      ctest -L main --verbose
         
     | 
| 305 | 
         
            +
             
     | 
| 306 | 
         
            +
              ubuntu-22-cmake-vulkan:
         
     | 
| 307 | 
         
            +
                runs-on: ubuntu-22.04
         
     | 
| 308 | 
         
            +
             
     | 
| 309 | 
         
            +
                steps:
         
     | 
| 310 | 
         
            +
                  - name: Clone
         
     | 
| 311 | 
         
            +
                    id: checkout
         
     | 
| 312 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 313 | 
         
            +
             
     | 
| 314 | 
         
            +
                  - name: Dependencies
         
     | 
| 315 | 
         
            +
                    id: depends
         
     | 
| 316 | 
         
            +
                    run: |
         
     | 
| 317 | 
         
            +
                      wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
         
     | 
| 318 | 
         
            +
                      sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
         
     | 
| 319 | 
         
            +
                      sudo apt-get update -y
         
     | 
| 320 | 
         
            +
                      sudo apt-get install -y build-essential vulkan-sdk
         
     | 
| 321 | 
         
            +
             
     | 
| 322 | 
         
            +
                  - name: Build
         
     | 
| 323 | 
         
            +
                    id: cmake_build
         
     | 
| 324 | 
         
            +
                    run: |
         
     | 
| 325 | 
         
            +
                      mkdir build
         
     | 
| 326 | 
         
            +
                      cd build
         
     | 
| 327 | 
         
            +
                      cmake -DGGML_VULKAN=ON ..
         
     | 
| 328 | 
         
            +
                      cmake --build . --config Release -j $(nproc)
         
     | 
| 329 | 
         
            +
             
     | 
| 330 | 
         
            +
              ubuntu-22-cmake-hip:
         
     | 
| 331 | 
         
            +
                runs-on: ubuntu-22.04
         
     | 
| 332 | 
         
            +
                container: rocm/dev-ubuntu-22.04:6.0.2
         
     | 
| 333 | 
         
            +
             
     | 
| 334 | 
         
            +
                steps:
         
     | 
| 335 | 
         
            +
                  - name: Clone
         
     | 
| 336 | 
         
            +
                    id: checkout
         
     | 
| 337 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 338 | 
         
            +
             
     | 
| 339 | 
         
            +
                  - name: Dependencies
         
     | 
| 340 | 
         
            +
                    id: depends
         
     | 
| 341 | 
         
            +
                    run: |
         
     | 
| 342 | 
         
            +
                      sudo apt-get update
         
     | 
| 343 | 
         
            +
                      sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
         
     | 
| 344 | 
         
            +
             
     | 
| 345 | 
         
            +
                  - name: Build with native CMake HIP support
         
     | 
| 346 | 
         
            +
                    id: cmake_build
         
     | 
| 347 | 
         
            +
                    run: |
         
     | 
| 348 | 
         
            +
                      cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
         
     | 
| 349 | 
         
            +
                      cmake --build build --config Release -j $(nproc)
         
     | 
| 350 | 
         
            +
             
     | 
| 351 | 
         
            +
                  - name: Build with legacy HIP support
         
     | 
| 352 | 
         
            +
                    id: cmake_build_legacy_hip
         
     | 
| 353 | 
         
            +
                    run: |
         
     | 
| 354 | 
         
            +
                      cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
         
     | 
| 355 | 
         
            +
                      cmake --build build2 --config Release -j $(nproc)
         
     | 
| 356 | 
         
            +
             
     | 
| 357 | 
         
            +
              ubuntu-22-cmake-musa:
         
     | 
| 358 | 
         
            +
                runs-on: ubuntu-22.04
         
     | 
| 359 | 
         
            +
                container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
         
     | 
| 360 | 
         
            +
             
     | 
| 361 | 
         
            +
                steps:
         
     | 
| 362 | 
         
            +
                  - name: Clone
         
     | 
| 363 | 
         
            +
                    id: checkout
         
     | 
| 364 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 365 | 
         
            +
             
     | 
| 366 | 
         
            +
                  - name: Dependencies
         
     | 
| 367 | 
         
            +
                    id: depends
         
     | 
| 368 | 
         
            +
                    run: |
         
     | 
| 369 | 
         
            +
                      apt-get update
         
     | 
| 370 | 
         
            +
                      apt-get install -y build-essential git cmake libcurl4-openssl-dev
         
     | 
| 371 | 
         
            +
             
     | 
| 372 | 
         
            +
                  - name: Build with native CMake MUSA support
         
     | 
| 373 | 
         
            +
                    id: cmake_build
         
     | 
| 374 | 
         
            +
                    run: |
         
     | 
| 375 | 
         
            +
                      cmake -B build -S . -DGGML_MUSA=ON
         
     | 
| 376 | 
         
            +
                      cmake --build build --config Release -j $(nproc)
         
     | 
| 377 | 
         
            +
             
     | 
| 378 | 
         
            +
              ubuntu-22-cmake-sycl:
         
     | 
| 379 | 
         
            +
                runs-on: ubuntu-22.04
         
     | 
| 380 | 
         
            +
             
     | 
| 381 | 
         
            +
                continue-on-error: true
         
     | 
| 382 | 
         
            +
             
     | 
| 383 | 
         
            +
                steps:
         
     | 
| 384 | 
         
            +
                  - uses: actions/checkout@v4
         
     | 
| 385 | 
         
            +
             
     | 
| 386 | 
         
            +
                  - name: add oneAPI to apt
         
     | 
| 387 | 
         
            +
                    shell: bash
         
     | 
| 388 | 
         
            +
                    run: |
         
     | 
| 389 | 
         
            +
                      cd /tmp
         
     | 
| 390 | 
         
            +
                      wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
         
     | 
| 391 | 
         
            +
                      sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
         
     | 
| 392 | 
         
            +
                      rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
         
     | 
| 393 | 
         
            +
                      sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
         
     | 
| 394 | 
         
            +
             
     | 
| 395 | 
         
            +
                  - name: install oneAPI dpcpp compiler
         
     | 
| 396 | 
         
            +
                    shell: bash
         
     | 
| 397 | 
         
            +
                    run: |
         
     | 
| 398 | 
         
            +
                      sudo apt update
         
     | 
| 399 | 
         
            +
                      sudo apt install intel-oneapi-compiler-dpcpp-cpp
         
     | 
| 400 | 
         
            +
             
     | 
| 401 | 
         
            +
                  - name: install oneAPI MKL library
         
     | 
| 402 | 
         
            +
                    shell: bash
         
     | 
| 403 | 
         
            +
                    run: |
         
     | 
| 404 | 
         
            +
                      sudo apt install intel-oneapi-mkl-devel
         
     | 
| 405 | 
         
            +
             
     | 
| 406 | 
         
            +
                  - name: Clone
         
     | 
| 407 | 
         
            +
                    id: checkout
         
     | 
| 408 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 409 | 
         
            +
             
     | 
| 410 | 
         
            +
                  - name: Build
         
     | 
| 411 | 
         
            +
                    id: cmake_build
         
     | 
| 412 | 
         
            +
                    run: |
         
     | 
| 413 | 
         
            +
                      source /opt/intel/oneapi/setvars.sh
         
     | 
| 414 | 
         
            +
                      mkdir build
         
     | 
| 415 | 
         
            +
                      cd build
         
     | 
| 416 | 
         
            +
                      cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
         
     | 
| 417 | 
         
            +
                      cmake --build . --config Release -j $(nproc)
         
     | 
| 418 | 
         
            +
             
     | 
| 419 | 
         
            +
              ubuntu-22-cmake-sycl-fp16:
         
     | 
| 420 | 
         
            +
                runs-on: ubuntu-22.04
         
     | 
| 421 | 
         
            +
             
     | 
| 422 | 
         
            +
                continue-on-error: true
         
     | 
| 423 | 
         
            +
             
     | 
| 424 | 
         
            +
                steps:
         
     | 
| 425 | 
         
            +
                  - uses: actions/checkout@v4
         
     | 
| 426 | 
         
            +
             
     | 
| 427 | 
         
            +
                  - name: add oneAPI to apt
         
     | 
| 428 | 
         
            +
                    shell: bash
         
     | 
| 429 | 
         
            +
                    run: |
         
     | 
| 430 | 
         
            +
                      cd /tmp
         
     | 
| 431 | 
         
            +
                      wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
         
     | 
| 432 | 
         
            +
                      sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
         
     | 
| 433 | 
         
            +
                      rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
         
     | 
| 434 | 
         
            +
                      sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
         
     | 
| 435 | 
         
            +
             
     | 
| 436 | 
         
            +
                  - name: install oneAPI dpcpp compiler
         
     | 
| 437 | 
         
            +
                    shell: bash
         
     | 
| 438 | 
         
            +
                    run: |
         
     | 
| 439 | 
         
            +
                      sudo apt update
         
     | 
| 440 | 
         
            +
                      sudo apt install intel-oneapi-compiler-dpcpp-cpp
         
     | 
| 441 | 
         
            +
             
     | 
| 442 | 
         
            +
                  - name: install oneAPI MKL library
         
     | 
| 443 | 
         
            +
                    shell: bash
         
     | 
| 444 | 
         
            +
                    run: |
         
     | 
| 445 | 
         
            +
                      sudo apt install intel-oneapi-mkl-devel
         
     | 
| 446 | 
         
            +
             
     | 
| 447 | 
         
            +
                  - name: Clone
         
     | 
| 448 | 
         
            +
                    id: checkout
         
     | 
| 449 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 450 | 
         
            +
             
     | 
| 451 | 
         
            +
                  - name: Build
         
     | 
| 452 | 
         
            +
                    id: cmake_build
         
     | 
| 453 | 
         
            +
                    run: |
         
     | 
| 454 | 
         
            +
                      source /opt/intel/oneapi/setvars.sh
         
     | 
| 455 | 
         
            +
                      mkdir build
         
     | 
| 456 | 
         
            +
                      cd build
         
     | 
| 457 | 
         
            +
                      cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
         
     | 
| 458 | 
         
            +
                      cmake --build . --config Release -j $(nproc)
         
     | 
| 459 | 
         
            +
             
     | 
| 460 | 
         
            +
              # TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
         
     | 
| 461 | 
         
            +
              #       how to debug it.
         
     | 
| 462 | 
         
            +
              #       ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
         
     | 
| 463 | 
         
            +
              #       would be great if we fix these
         
     | 
| 464 | 
         
            +
              macOS-latest-cmake:
         
     | 
| 465 | 
         
            +
                runs-on: macos-latest
         
     | 
| 466 | 
         
            +
             
     | 
| 467 | 
         
            +
                steps:
         
     | 
| 468 | 
         
            +
                  - name: Clone
         
     | 
| 469 | 
         
            +
                    id: checkout
         
     | 
| 470 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 471 | 
         
            +
             
     | 
| 472 | 
         
            +
                  - name: Dependencies
         
     | 
| 473 | 
         
            +
                    id: depends
         
     | 
| 474 | 
         
            +
                    continue-on-error: true
         
     | 
| 475 | 
         
            +
                    run: |
         
     | 
| 476 | 
         
            +
                      brew update
         
     | 
| 477 | 
         
            +
             
     | 
| 478 | 
         
            +
                  - name: Build
         
     | 
| 479 | 
         
            +
                    id: cmake_build
         
     | 
| 480 | 
         
            +
                    run: |
         
     | 
| 481 | 
         
            +
                      sysctl -a
         
     | 
| 482 | 
         
            +
                      mkdir build
         
     | 
| 483 | 
         
            +
                      cd build
         
     | 
| 484 | 
         
            +
                      cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF ..
         
     | 
| 485 | 
         
            +
                      cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
         
     | 
| 486 | 
         
            +
             
     | 
| 487 | 
         
            +
                  - name: Test
         
     | 
| 488 | 
         
            +
                    id: cmake_test
         
     | 
| 489 | 
         
            +
                    run: |
         
     | 
| 490 | 
         
            +
                      cd build
         
     | 
| 491 | 
         
            +
                      ctest -L main --verbose --timeout 900
         
     | 
| 492 | 
         
            +
             
     | 
| 493 | 
         
            +
              macOS-latest-cmake-ios:
         
     | 
| 494 | 
         
            +
                runs-on: macos-latest
         
     | 
| 495 | 
         
            +
             
     | 
| 496 | 
         
            +
                steps:
         
     | 
| 497 | 
         
            +
                  - name: Clone
         
     | 
| 498 | 
         
            +
                    id: checkout
         
     | 
| 499 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 500 | 
         
            +
             
     | 
| 501 | 
         
            +
                  - name: Dependencies
         
     | 
| 502 | 
         
            +
                    id: depends
         
     | 
| 503 | 
         
            +
                    continue-on-error: true
         
     | 
| 504 | 
         
            +
                    run: |
         
     | 
| 505 | 
         
            +
                      brew update
         
     | 
| 506 | 
         
            +
             
     | 
| 507 | 
         
            +
                  - name: Build
         
     | 
| 508 | 
         
            +
                    id: cmake_build
         
     | 
| 509 | 
         
            +
                    run: |
         
     | 
| 510 | 
         
            +
                      sysctl -a
         
     | 
| 511 | 
         
            +
                      mkdir build
         
     | 
| 512 | 
         
            +
                      cd build
         
     | 
| 513 | 
         
            +
                      cmake -G Xcode .. \
         
     | 
| 514 | 
         
            +
                        -DGGML_METAL_USE_BF16=ON \
         
     | 
| 515 | 
         
            +
                        -DGGML_METAL_EMBED_LIBRARY=ON \
         
     | 
| 516 | 
         
            +
                        -DLLAMA_BUILD_EXAMPLES=OFF \
         
     | 
| 517 | 
         
            +
                        -DLLAMA_BUILD_TESTS=OFF \
         
     | 
| 518 | 
         
            +
                        -DLLAMA_BUILD_SERVER=OFF \
         
     | 
| 519 | 
         
            +
                        -DCMAKE_SYSTEM_NAME=iOS \
         
     | 
| 520 | 
         
            +
                        -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
         
     | 
| 521 | 
         
            +
                        -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
         
     | 
| 522 | 
         
            +
                      cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
         
     | 
| 523 | 
         
            +
             
     | 
| 524 | 
         
            +
              macOS-latest-cmake-tvos:
         
     | 
| 525 | 
         
            +
                runs-on: macos-latest
         
     | 
| 526 | 
         
            +
             
     | 
| 527 | 
         
            +
                steps:
         
     | 
| 528 | 
         
            +
                  - name: Clone
         
     | 
| 529 | 
         
            +
                    id: checkout
         
     | 
| 530 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 531 | 
         
            +
             
     | 
| 532 | 
         
            +
                  - name: Dependencies
         
     | 
| 533 | 
         
            +
                    id: depends
         
     | 
| 534 | 
         
            +
                    continue-on-error: true
         
     | 
| 535 | 
         
            +
                    run: |
         
     | 
| 536 | 
         
            +
                      brew update
         
     | 
| 537 | 
         
            +
             
     | 
| 538 | 
         
            +
                  - name: Build
         
     | 
| 539 | 
         
            +
                    id: cmake_build
         
     | 
| 540 | 
         
            +
                    run: |
         
     | 
| 541 | 
         
            +
                      sysctl -a
         
     | 
| 542 | 
         
            +
                      mkdir build
         
     | 
| 543 | 
         
            +
                      cd build
         
     | 
| 544 | 
         
            +
                      cmake -G Xcode .. \
         
     | 
| 545 | 
         
            +
                        -DGGML_METAL_USE_BF16=ON \
         
     | 
| 546 | 
         
            +
                        -DGGML_METAL_EMBED_LIBRARY=ON \
         
     | 
| 547 | 
         
            +
                        -DLLAMA_BUILD_EXAMPLES=OFF \
         
     | 
| 548 | 
         
            +
                        -DLLAMA_BUILD_TESTS=OFF \
         
     | 
| 549 | 
         
            +
                        -DLLAMA_BUILD_SERVER=OFF \
         
     | 
| 550 | 
         
            +
                        -DCMAKE_SYSTEM_NAME=tvOS \
         
     | 
| 551 | 
         
            +
                        -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
         
     | 
| 552 | 
         
            +
                        -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
         
     | 
| 553 | 
         
            +
                      cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
         
     | 
| 554 | 
         
            +
             
     | 
| 555 | 
         
            +
            # TODO: tmp disabled. see for possible re-enable:
         
     | 
| 556 | 
         
            +
            #       https://github.com/ggerganov/llama.cpp/pull/10525
         
     | 
| 557 | 
         
            +
            #  macOS-latest-swift:
         
     | 
| 558 | 
         
            +
            #    runs-on: macos-latest
         
     | 
| 559 | 
         
            +
            #
         
     | 
| 560 | 
         
            +
            #    strategy:
         
     | 
| 561 | 
         
            +
            #      matrix:
         
     | 
| 562 | 
         
            +
            #        destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
         
     | 
| 563 | 
         
            +
            #
         
     | 
| 564 | 
         
            +
            #    steps:
         
     | 
| 565 | 
         
            +
            #      - name: Clone
         
     | 
| 566 | 
         
            +
            #        id: checkout
         
     | 
| 567 | 
         
            +
            #        uses: actions/checkout@v4
         
     | 
| 568 | 
         
            +
            #
         
     | 
| 569 | 
         
            +
            #      - name: Dependencies
         
     | 
| 570 | 
         
            +
            #        id: depends
         
     | 
| 571 | 
         
            +
            #        continue-on-error: true
         
     | 
| 572 | 
         
            +
            #        run: |
         
     | 
| 573 | 
         
            +
            #          brew update
         
     | 
| 574 | 
         
            +
            #
         
     | 
| 575 | 
         
            +
            #      - name: xcodebuild for swift package
         
     | 
| 576 | 
         
            +
            #        id: xcodebuild
         
     | 
| 577 | 
         
            +
            #        run: |
         
     | 
| 578 | 
         
            +
            #          xcodebuild -scheme llama -destination "${{ matrix.destination }}"
         
     | 
| 579 | 
         
            +
            #
         
     | 
| 580 | 
         
            +
            #      - name: Build Swift Example
         
     | 
| 581 | 
         
            +
            #        id: make_build_swift_example
         
     | 
| 582 | 
         
            +
            #        run: |
         
     | 
| 583 | 
         
            +
            #            make swift
         
     | 
| 584 | 
         
            +
             
     | 
| 585 | 
         
            +
              windows-msys2:
         
     | 
| 586 | 
         
            +
                runs-on: windows-latest
         
     | 
| 587 | 
         
            +
             
     | 
| 588 | 
         
            +
                strategy:
         
     | 
| 589 | 
         
            +
                  fail-fast: false
         
     | 
| 590 | 
         
            +
                  matrix:
         
     | 
| 591 | 
         
            +
                    include:
         
     | 
| 592 | 
         
            +
                      - { sys: UCRT64,  env: ucrt-x86_64,  build: Release }
         
     | 
| 593 | 
         
            +
                      - { sys: CLANG64, env: clang-x86_64, build: Release }
         
     | 
| 594 | 
         
            +
             
     | 
| 595 | 
         
            +
                steps:
         
     | 
| 596 | 
         
            +
                  - name: Clone
         
     | 
| 597 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 598 | 
         
            +
             
     | 
| 599 | 
         
            +
                  - name: Setup ${{ matrix.sys }}
         
     | 
| 600 | 
         
            +
                    uses: msys2/setup-msys2@v2
         
     | 
| 601 | 
         
            +
                    with:
         
     | 
| 602 | 
         
            +
                      update: true
         
     | 
| 603 | 
         
            +
                      msystem: ${{matrix.sys}}
         
     | 
| 604 | 
         
            +
                      install: >-
         
     | 
| 605 | 
         
            +
                        base-devel
         
     | 
| 606 | 
         
            +
                        mingw-w64-${{matrix.env}}-toolchain
         
     | 
| 607 | 
         
            +
                        mingw-w64-${{matrix.env}}-cmake
         
     | 
| 608 | 
         
            +
                        mingw-w64-${{matrix.env}}-openblas
         
     | 
| 609 | 
         
            +
             
     | 
| 610 | 
         
            +
                  - name: Build using CMake
         
     | 
| 611 | 
         
            +
                    shell: msys2 {0}
         
     | 
| 612 | 
         
            +
                    run: |
         
     | 
| 613 | 
         
            +
                        cmake -B build
         
     | 
| 614 | 
         
            +
                        cmake --build build --config ${{ matrix.build }} -j $(nproc)
         
     | 
| 615 | 
         
            +
             
     | 
| 616 | 
         
            +
                  - name: Clean after building using CMake
         
     | 
| 617 | 
         
            +
                    shell: msys2 {0}
         
     | 
| 618 | 
         
            +
                    run: |
         
     | 
| 619 | 
         
            +
                        rm -rf build
         
     | 
| 620 | 
         
            +
             
     | 
| 621 | 
         
            +
                  - name: Build using CMake w/ OpenBLAS
         
     | 
| 622 | 
         
            +
                    shell: msys2 {0}
         
     | 
| 623 | 
         
            +
                    run: |
         
     | 
| 624 | 
         
            +
                        cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
         
     | 
| 625 | 
         
            +
                        cmake --build build --config ${{ matrix.build }} -j $(nproc)
         
     | 
| 626 | 
         
            +
             
     | 
| 627 | 
         
            +
              windows-latest-cmake:
         
     | 
| 628 | 
         
            +
                runs-on: windows-latest
         
     | 
| 629 | 
         
            +
             
     | 
| 630 | 
         
            +
                env:
         
     | 
| 631 | 
         
            +
                  OPENBLAS_VERSION: 0.3.23
         
     | 
| 632 | 
         
            +
                  SDE_VERSION: 9.33.0-2024-01-07
         
     | 
| 633 | 
         
            +
                  VULKAN_VERSION: 1.3.261.1
         
     | 
| 634 | 
         
            +
             
     | 
| 635 | 
         
            +
                strategy:
         
     | 
| 636 | 
         
            +
                  matrix:
         
     | 
| 637 | 
         
            +
                    include:
         
     | 
| 638 | 
         
            +
                      - build: 'noavx-x64'
         
     | 
| 639 | 
         
            +
                        defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
         
     | 
| 640 | 
         
            +
                      - build: 'avx2-x64'
         
     | 
| 641 | 
         
            +
                        defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
         
     | 
| 642 | 
         
            +
                      - build: 'avx-x64'
         
     | 
| 643 | 
         
            +
                        defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
         
     | 
| 644 | 
         
            +
                      - build: 'avx512-x64'
         
     | 
| 645 | 
         
            +
                        defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
         
     | 
| 646 | 
         
            +
                      - build: 'openblas-x64'
         
     | 
| 647 | 
         
            +
                        defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
         
     | 
| 648 | 
         
            +
                      - build: 'kompute-x64'
         
     | 
| 649 | 
         
            +
                        defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
         
     | 
| 650 | 
         
            +
                      - build: 'vulkan-x64'
         
     | 
| 651 | 
         
            +
                        defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
         
     | 
| 652 | 
         
            +
                      - build: 'llvm-arm64'
         
     | 
| 653 | 
         
            +
                        defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
         
     | 
| 654 | 
         
            +
                      - build: 'msvc-arm64'
         
     | 
| 655 | 
         
            +
                        defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
         
     | 
| 656 | 
         
            +
             
     | 
| 657 | 
         
            +
                steps:
         
     | 
| 658 | 
         
            +
                  - name: Clone
         
     | 
| 659 | 
         
            +
                    id: checkout
         
     | 
| 660 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 661 | 
         
            +
                    with:
         
     | 
| 662 | 
         
            +
                      fetch-depth: 0
         
     | 
| 663 | 
         
            +
             
     | 
| 664 | 
         
            +
                  - name: Clone Kompute submodule
         
     | 
| 665 | 
         
            +
                    id: clone_kompute
         
     | 
| 666 | 
         
            +
                    if: ${{ matrix.build == 'kompute-x64' }}
         
     | 
| 667 | 
         
            +
                    run: |
         
     | 
| 668 | 
         
            +
                      git submodule update --init ggml/src/ggml-kompute/kompute
         
     | 
| 669 | 
         
            +
             
     | 
| 670 | 
         
            +
                  - name: Download OpenBLAS
         
     | 
| 671 | 
         
            +
                    id: get_openblas
         
     | 
| 672 | 
         
            +
                    if: ${{ matrix.build == 'openblas-x64' }}
         
     | 
| 673 | 
         
            +
                    run: |
         
     | 
| 674 | 
         
            +
                      curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
         
     | 
| 675 | 
         
            +
                      curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
         
     | 
| 676 | 
         
            +
                      mkdir $env:RUNNER_TEMP/openblas
         
     | 
| 677 | 
         
            +
                      tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
         
     | 
| 678 | 
         
            +
                      $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
         
     | 
| 679 | 
         
            +
                      $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
         
     | 
| 680 | 
         
            +
                      $lib =  $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
         
     | 
| 681 | 
         
            +
                      & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
         
     | 
| 682 | 
         
            +
             
     | 
| 683 | 
         
            +
                  - name: Install Vulkan SDK
         
     | 
| 684 | 
         
            +
                    id: get_vulkan
         
     | 
| 685 | 
         
            +
                    if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
         
     | 
| 686 | 
         
            +
                    run: |
         
     | 
| 687 | 
         
            +
                      curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
         
     | 
| 688 | 
         
            +
                      & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
         
     | 
| 689 | 
         
            +
                      Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
         
     | 
| 690 | 
         
            +
                      Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
         
     | 
| 691 | 
         
            +
             
     | 
| 692 | 
         
            +
                  - name: Install Ninja
         
     | 
| 693 | 
         
            +
                    id: install_ninja
         
     | 
| 694 | 
         
            +
                    run: |
         
     | 
| 695 | 
         
            +
                      choco install ninja
         
     | 
| 696 | 
         
            +
             
     | 
| 697 | 
         
            +
                  - name: Build
         
     | 
| 698 | 
         
            +
                    id: cmake_build
         
     | 
| 699 | 
         
            +
                    run: |
         
     | 
| 700 | 
         
            +
                      cmake -S . -B build ${{ matrix.defines }}
         
     | 
| 701 | 
         
            +
                      cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
         
     | 
| 702 | 
         
            +
             
     | 
| 703 | 
         
            +
                  - name: Add libopenblas.dll
         
     | 
| 704 | 
         
            +
                    id: add_libopenblas_dll
         
     | 
| 705 | 
         
            +
                    if: ${{ matrix.build == 'openblas-x64' }}
         
     | 
| 706 | 
         
            +
                    run: |
         
     | 
| 707 | 
         
            +
                      cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
         
     | 
| 708 | 
         
            +
                      cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
         
     | 
| 709 | 
         
            +
             
     | 
| 710 | 
         
            +
                  - name: Check AVX512F support
         
     | 
| 711 | 
         
            +
                    id: check_avx512f
         
     | 
| 712 | 
         
            +
                    if: ${{ matrix.build == 'avx512-x64' }}
         
     | 
| 713 | 
         
            +
                    continue-on-error: true
         
     | 
| 714 | 
         
            +
                    run: |
         
     | 
| 715 | 
         
            +
                      cd build
         
     | 
| 716 | 
         
            +
                      $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
         
     | 
| 717 | 
         
            +
                      $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
         
     | 
| 718 | 
         
            +
                      $cl =  $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
         
     | 
| 719 | 
         
            +
                      echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
         
     | 
| 720 | 
         
            +
                      & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
         
     | 
| 721 | 
         
            +
                      .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
         
     | 
| 722 | 
         
            +
             
     | 
| 723 | 
         
            +
                  - name: Test
         
     | 
| 724 | 
         
            +
                    id: cmake_test
         
     | 
| 725 | 
         
            +
                    # not all machines have native AVX-512
         
     | 
| 726 | 
         
            +
                    if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
         
     | 
| 727 | 
         
            +
                    run: |
         
     | 
| 728 | 
         
            +
                      cd build
         
     | 
| 729 | 
         
            +
                      ctest -L main -C Release --verbose --timeout 900
         
     | 
| 730 | 
         
            +
             
     | 
| 731 | 
         
            +
                  - name: Test (Intel SDE)
         
     | 
| 732 | 
         
            +
                    id: cmake_test_sde
         
     | 
| 733 | 
         
            +
                    if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
         
     | 
| 734 | 
         
            +
                    run: |
         
     | 
| 735 | 
         
            +
                      curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
         
     | 
| 736 | 
         
            +
                      # for some weird reason windows tar doesn't like sde tar.xz
         
     | 
| 737 | 
         
            +
                      7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
         
     | 
| 738 | 
         
            +
                      7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
         
     | 
| 739 | 
         
            +
                      $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
         
     | 
| 740 | 
         
            +
                      cd build
         
     | 
| 741 | 
         
            +
                      $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
         
     | 
| 742 | 
         
            +
                      & $sde -future -- ctest -L main -C Release --verbose --timeout 900
         
     | 
| 743 | 
         
            +
             
     | 
| 744 | 
         
            +
                  - name: Determine tag name
         
     | 
| 745 | 
         
            +
                    id: tag
         
     | 
| 746 | 
         
            +
                    shell: bash
         
     | 
| 747 | 
         
            +
                    run: |
         
     | 
| 748 | 
         
            +
                      BUILD_NUMBER="$(git rev-list --count HEAD)"
         
     | 
| 749 | 
         
            +
                      SHORT_HASH="$(git rev-parse --short=7 HEAD)"
         
     | 
| 750 | 
         
            +
                      if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
         
     | 
| 751 | 
         
            +
                        echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
         
     | 
| 752 | 
         
            +
                      else
         
     | 
| 753 | 
         
            +
                        SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
         
     | 
| 754 | 
         
            +
                        echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
         
     | 
| 755 | 
         
            +
                      fi
         
     | 
| 756 | 
         
            +
             
     | 
| 757 | 
         
            +
                  - name: Pack artifacts
         
     | 
| 758 | 
         
            +
                    id: pack_artifacts
         
     | 
| 759 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 760 | 
         
            +
                    run: |
         
     | 
| 761 | 
         
            +
                      Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
         
     | 
| 762 | 
         
            +
                      7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
         
     | 
| 763 | 
         
            +
             
     | 
| 764 | 
         
            +
                  - name: Upload artifacts
         
     | 
| 765 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 766 | 
         
            +
                    uses: actions/upload-artifact@v4
         
     | 
| 767 | 
         
            +
                    with:
         
     | 
| 768 | 
         
            +
                      path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
         
     | 
| 769 | 
         
            +
                      name: llama-bin-win-${{ matrix.build }}.zip
         
     | 
| 770 | 
         
            +
             
     | 
| 771 | 
         
            +
              ubuntu-latest-cmake-cuda:
         
     | 
| 772 | 
         
            +
                runs-on: ubuntu-latest
         
     | 
| 773 | 
         
            +
                container: nvidia/cuda:12.6.2-devel-ubuntu24.04
         
     | 
| 774 | 
         
            +
             
     | 
| 775 | 
         
            +
                steps:
         
     | 
| 776 | 
         
            +
                    - name: Clone
         
     | 
| 777 | 
         
            +
                      id: checkout
         
     | 
| 778 | 
         
            +
                      uses: actions/checkout@v4
         
     | 
| 779 | 
         
            +
             
     | 
| 780 | 
         
            +
                    - name: Install dependencies
         
     | 
| 781 | 
         
            +
                      env:
         
     | 
| 782 | 
         
            +
                        DEBIAN_FRONTEND: noninteractive
         
     | 
| 783 | 
         
            +
                      run: |
         
     | 
| 784 | 
         
            +
                          apt update
         
     | 
| 785 | 
         
            +
                          apt install -y cmake build-essential ninja-build libgomp1 git
         
     | 
| 786 | 
         
            +
             
     | 
| 787 | 
         
            +
                    - name: Build with CMake
         
     | 
| 788 | 
         
            +
                      run: |
         
     | 
| 789 | 
         
            +
                        cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
         
     | 
| 790 | 
         
            +
                        cmake --build build
         
     | 
| 791 | 
         
            +
             
     | 
| 792 | 
         
            +
              windows-2019-cmake-cuda:
         
     | 
| 793 | 
         
            +
                runs-on: windows-2019
         
     | 
| 794 | 
         
            +
             
     | 
| 795 | 
         
            +
                strategy:
         
     | 
| 796 | 
         
            +
                  matrix:
         
     | 
| 797 | 
         
            +
                    cuda: ['12.4', '11.7']
         
     | 
| 798 | 
         
            +
                    build: ['cuda']
         
     | 
| 799 | 
         
            +
             
     | 
| 800 | 
         
            +
                steps:
         
     | 
| 801 | 
         
            +
                  - name: Clone
         
     | 
| 802 | 
         
            +
                    id: checkout
         
     | 
| 803 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 804 | 
         
            +
                    with:
         
     | 
| 805 | 
         
            +
                        fetch-depth: 0
         
     | 
| 806 | 
         
            +
             
     | 
| 807 | 
         
            +
                  - name: Install Cuda Toolkit 11.7
         
     | 
| 808 | 
         
            +
                    if: ${{ matrix.cuda == '11.7' }}
         
     | 
| 809 | 
         
            +
                    run: |
         
     | 
| 810 | 
         
            +
                      mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
         
     | 
| 811 | 
         
            +
                      choco install unzip -y
         
     | 
| 812 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
         
     | 
| 813 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
         
     | 
| 814 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
         
     | 
| 815 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
         
     | 
| 816 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
         
     | 
| 817 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
         
     | 
| 818 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
         
     | 
| 819 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
         
     | 
| 820 | 
         
            +
                      unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
         
     | 
| 821 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
         
     | 
| 822 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
         
     | 
| 823 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
         
     | 
| 824 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
         
     | 
| 825 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
         
     | 
| 826 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
         
     | 
| 827 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
         
     | 
| 828 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
         
     | 
| 829 | 
         
            +
                      echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
         
     | 
| 830 | 
         
            +
                      echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
         
     | 
| 831 | 
         
            +
                      echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
         
     | 
| 832 | 
         
            +
                      echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
         
     | 
| 833 | 
         
            +
             
     | 
| 834 | 
         
            +
                  - name: Install Cuda Toolkit 12.4
         
     | 
| 835 | 
         
            +
                    if: ${{ matrix.cuda == '12.4' }}
         
     | 
| 836 | 
         
            +
                    run: |
         
     | 
| 837 | 
         
            +
                      mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
         
     | 
| 838 | 
         
            +
                      choco install unzip -y
         
     | 
| 839 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
         
     | 
| 840 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
         
     | 
| 841 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
         
     | 
| 842 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
         
     | 
| 843 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
         
     | 
| 844 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
         
     | 
| 845 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
         
     | 
| 846 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
         
     | 
| 847 | 
         
            +
                      curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
         
     | 
| 848 | 
         
            +
                      unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
         
     | 
| 849 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 850 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 851 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 852 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 853 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 854 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 855 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 856 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 857 | 
         
            +
                      xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
         
     | 
| 858 | 
         
            +
                      echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
         
     | 
| 859 | 
         
            +
                      echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
         
     | 
| 860 | 
         
            +
                      echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
         
     | 
| 861 | 
         
            +
                      echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
         
     | 
| 862 | 
         
            +
             
     | 
| 863 | 
         
            +
                  - name: Install ccache
         
     | 
| 864 | 
         
            +
                    uses: hendrikmuhs/ccache-action@v1.2
         
     | 
| 865 | 
         
            +
                    with:
         
     | 
| 866 | 
         
            +
                      key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
         
     | 
| 867 | 
         
            +
             
     | 
| 868 | 
         
            +
                  - name: Install Ninja
         
     | 
| 869 | 
         
            +
                    id: install_ninja
         
     | 
| 870 | 
         
            +
                    run: |
         
     | 
| 871 | 
         
            +
                      choco install ninja
         
     | 
| 872 | 
         
            +
             
     | 
| 873 | 
         
            +
                  - name: Build
         
     | 
| 874 | 
         
            +
                    id: cmake_build
         
     | 
| 875 | 
         
            +
                    shell: cmd
         
     | 
| 876 | 
         
            +
                    run: |
         
     | 
| 877 | 
         
            +
                      call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
         
     | 
| 878 | 
         
            +
                      cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
         
     | 
| 879 | 
         
            +
                      set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
         
     | 
| 880 | 
         
            +
                      cmake --build build --config Release -j %NINJA_JOBS% -t ggml
         
     | 
| 881 | 
         
            +
                      cmake --build build --config Release
         
     | 
| 882 | 
         
            +
             
     | 
| 883 | 
         
            +
                  - name: Determine tag name
         
     | 
| 884 | 
         
            +
                    id: tag
         
     | 
| 885 | 
         
            +
                    shell: bash
         
     | 
| 886 | 
         
            +
                    run: |
         
     | 
| 887 | 
         
            +
                      BUILD_NUMBER="$(git rev-list --count HEAD)"
         
     | 
| 888 | 
         
            +
                      SHORT_HASH="$(git rev-parse --short=7 HEAD)"
         
     | 
| 889 | 
         
            +
                      if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
         
     | 
| 890 | 
         
            +
                        echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
         
     | 
| 891 | 
         
            +
                      else
         
     | 
| 892 | 
         
            +
                        SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
         
     | 
| 893 | 
         
            +
                        echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
         
     | 
| 894 | 
         
            +
                      fi
         
     | 
| 895 | 
         
            +
             
     | 
| 896 | 
         
            +
                  - name: Pack artifacts
         
     | 
| 897 | 
         
            +
                    id: pack_artifacts
         
     | 
| 898 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 899 | 
         
            +
                    run: |
         
     | 
| 900 | 
         
            +
                      7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
         
     | 
| 901 | 
         
            +
             
     | 
| 902 | 
         
            +
                  - name: Upload artifacts
         
     | 
| 903 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 904 | 
         
            +
                    uses: actions/upload-artifact@v4
         
     | 
| 905 | 
         
            +
                    with:
         
     | 
| 906 | 
         
            +
                      path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
         
     | 
| 907 | 
         
            +
                      name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
         
     | 
| 908 | 
         
            +
             
     | 
| 909 | 
         
            +
                  - name: Copy and pack Cuda runtime
         
     | 
| 910 | 
         
            +
                    if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
         
     | 
| 911 | 
         
            +
                    run: |
         
     | 
| 912 | 
         
            +
                      echo "Cuda install location: ${{ env.CUDA_PATH }}"
         
     | 
| 913 | 
         
            +
                      $dst='.\build\bin\cudart\'
         
     | 
| 914 | 
         
            +
                      robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
         
     | 
| 915 | 
         
            +
                      robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
         
     | 
| 916 | 
         
            +
                      7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
         
     | 
| 917 | 
         
            +
             
     | 
| 918 | 
         
            +
                  - name: Upload Cuda runtime
         
     | 
| 919 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 920 | 
         
            +
                    uses: actions/upload-artifact@v4
         
     | 
| 921 | 
         
            +
                    with:
         
     | 
| 922 | 
         
            +
                      path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
         
     | 
| 923 | 
         
            +
                      name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
         
     | 
| 924 | 
         
            +
             
     | 
| 925 | 
         
            +
              windows-latest-cmake-sycl:
         
     | 
| 926 | 
         
            +
                runs-on: windows-latest
         
     | 
| 927 | 
         
            +
             
     | 
| 928 | 
         
            +
                defaults:
         
     | 
| 929 | 
         
            +
                  run:
         
     | 
| 930 | 
         
            +
                    shell: bash
         
     | 
| 931 | 
         
            +
             
     | 
| 932 | 
         
            +
                env:
         
     | 
| 933 | 
         
            +
                  WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
         
     | 
| 934 | 
         
            +
                  WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
         
     | 
| 935 | 
         
            +
                  ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
         
     | 
| 936 | 
         
            +
                steps:
         
     | 
| 937 | 
         
            +
                  - name: Clone
         
     | 
| 938 | 
         
            +
                    id: checkout
         
     | 
| 939 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 940 | 
         
            +
                    with:
         
     | 
| 941 | 
         
            +
                      fetch-depth: 0
         
     | 
| 942 | 
         
            +
             
     | 
| 943 | 
         
            +
                  - name: Install
         
     | 
| 944 | 
         
            +
                    run:  |
         
     | 
| 945 | 
         
            +
                      scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
         
     | 
| 946 | 
         
            +
             
     | 
| 947 | 
         
            +
                  - name: Build
         
     | 
| 948 | 
         
            +
                    id: cmake_build
         
     | 
| 949 | 
         
            +
                    run:  examples/sycl/win-build-sycl.bat
         
     | 
| 950 | 
         
            +
             
     | 
| 951 | 
         
            +
                  - name: Determine tag name
         
     | 
| 952 | 
         
            +
                    id: tag
         
     | 
| 953 | 
         
            +
                    shell: bash
         
     | 
| 954 | 
         
            +
                    run: |
         
     | 
| 955 | 
         
            +
                      BUILD_NUMBER="$(git rev-list --count HEAD)"
         
     | 
| 956 | 
         
            +
                      SHORT_HASH="$(git rev-parse --short=7 HEAD)"
         
     | 
| 957 | 
         
            +
                      if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
         
     | 
| 958 | 
         
            +
                        echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
         
     | 
| 959 | 
         
            +
                      else
         
     | 
| 960 | 
         
            +
                        SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
         
     | 
| 961 | 
         
            +
                        echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
         
     | 
| 962 | 
         
            +
                      fi
         
     | 
| 963 | 
         
            +
             
     | 
| 964 | 
         
            +
                  - name: Build the release package
         
     | 
| 965 | 
         
            +
                    id: pack_artifacts
         
     | 
| 966 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 967 | 
         
            +
                    run: |
         
     | 
| 968 | 
         
            +
                      echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
         
     | 
| 969 | 
         
            +
             
     | 
| 970 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
         
     | 
| 971 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
         
     | 
| 972 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
         
     | 
| 973 | 
         
            +
             
     | 
| 974 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
         
     | 
| 975 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
         
     | 
| 976 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
         
     | 
| 977 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
         
     | 
| 978 | 
         
            +
             
     | 
| 979 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
         
     | 
| 980 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
         
     | 
| 981 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
         
     | 
| 982 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
         
     | 
| 983 | 
         
            +
             
     | 
| 984 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
         
     | 
| 985 | 
         
            +
                      cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
         
     | 
| 986 | 
         
            +
             
     | 
| 987 | 
         
            +
                      echo "cp oneAPI running time dll files to ./build/bin done"
         
     | 
| 988 | 
         
            +
                      7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
         
     | 
| 989 | 
         
            +
             
     | 
| 990 | 
         
            +
                  - name: Upload the release package
         
     | 
| 991 | 
         
            +
                    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 992 | 
         
            +
                    uses: actions/upload-artifact@v4
         
     | 
| 993 | 
         
            +
                    with:
         
     | 
| 994 | 
         
            +
                      path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
         
     | 
| 995 | 
         
            +
                      name: llama-bin-win-sycl-x64.zip
         
     | 
| 996 | 
         
            +
             
     | 
| 997 | 
         
            +
              windows-latest-cmake-hip:
         
     | 
| 998 | 
         
            +
                if: ${{ github.event.inputs.create_release != 'true' }}
         
     | 
| 999 | 
         
            +
                runs-on: windows-latest
         
     | 
| 1000 | 
         
            +
             
     | 
| 1001 | 
         
            +
                steps:
         
     | 
| 1002 | 
         
            +
                  - name: Clone
         
     | 
| 1003 | 
         
            +
                    id: checkout
         
     | 
| 1004 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 1005 | 
         
            +
             
     | 
| 1006 | 
         
            +
                  - name: Install
         
     | 
| 1007 | 
         
            +
                    id: depends
         
     | 
| 1008 | 
         
            +
                    run: |
         
     | 
| 1009 | 
         
            +
                      $ErrorActionPreference = "Stop"
         
     | 
| 1010 | 
         
            +
                      write-host "Downloading AMD HIP SDK Installer"
         
     | 
| 1011 | 
         
            +
                      Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
         
     | 
| 1012 | 
         
            +
                      write-host "Installing AMD HIP SDK"
         
     | 
| 1013 | 
         
            +
                      Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
         
     | 
| 1014 | 
         
            +
                      write-host "Completed AMD HIP SDK installation"
         
     | 
| 1015 | 
         
            +
             
     | 
| 1016 | 
         
            +
                  - name: Verify ROCm
         
     | 
| 1017 | 
         
            +
                    id: verify
         
     | 
| 1018 | 
         
            +
                    run: |
         
     | 
| 1019 | 
         
            +
                      & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
         
     | 
| 1020 | 
         
            +
             
     | 
| 1021 | 
         
            +
                  - name: Install ccache
         
     | 
| 1022 | 
         
            +
                    uses: hendrikmuhs/ccache-action@v1.2
         
     | 
| 1023 | 
         
            +
                    with:
         
     | 
| 1024 | 
         
            +
                      key: ${{ github.job }}
         
     | 
| 1025 | 
         
            +
             
     | 
| 1026 | 
         
            +
                  - name: Build
         
     | 
| 1027 | 
         
            +
                    id: cmake_build
         
     | 
| 1028 | 
         
            +
                    run: |
         
     | 
| 1029 | 
         
            +
                      $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
         
     | 
| 1030 | 
         
            +
                      $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
         
     | 
| 1031 | 
         
            +
                      cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
         
     | 
| 1032 | 
         
            +
                      cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
         
     | 
| 1033 | 
         
            +
             
     | 
| 1034 | 
         
            +
              windows-latest-cmake-hip-release:
         
     | 
| 1035 | 
         
            +
                if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 1036 | 
         
            +
                runs-on: windows-latest
         
     | 
| 1037 | 
         
            +
             
     | 
| 1038 | 
         
            +
                strategy:
         
     | 
| 1039 | 
         
            +
                  matrix:
         
     | 
| 1040 | 
         
            +
                    gpu_target: [gfx1100, gfx1101, gfx1030]
         
     | 
| 1041 | 
         
            +
             
     | 
| 1042 | 
         
            +
                steps:
         
     | 
| 1043 | 
         
            +
                  - name: Clone
         
     | 
| 1044 | 
         
            +
                    id: checkout
         
     | 
| 1045 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 1046 | 
         
            +
                    with:
         
     | 
| 1047 | 
         
            +
                        fetch-depth: 0
         
     | 
| 1048 | 
         
            +
             
     | 
| 1049 | 
         
            +
                  - name: Install
         
     | 
| 1050 | 
         
            +
                    id: depends
         
     | 
| 1051 | 
         
            +
                    run: |
         
     | 
| 1052 | 
         
            +
                      $ErrorActionPreference = "Stop"
         
     | 
| 1053 | 
         
            +
                      write-host "Downloading AMD HIP SDK Installer"
         
     | 
| 1054 | 
         
            +
                      Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
         
     | 
| 1055 | 
         
            +
                      write-host "Installing AMD HIP SDK"
         
     | 
| 1056 | 
         
            +
                      Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
         
     | 
| 1057 | 
         
            +
                      write-host "Completed AMD HIP SDK installation"
         
     | 
| 1058 | 
         
            +
             
     | 
| 1059 | 
         
            +
                  - name: Verify ROCm
         
     | 
| 1060 | 
         
            +
                    id: verify
         
     | 
| 1061 | 
         
            +
                    run: |
         
     | 
| 1062 | 
         
            +
                      & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
         
     | 
| 1063 | 
         
            +
             
     | 
| 1064 | 
         
            +
                  - name: Build
         
     | 
| 1065 | 
         
            +
                    id: cmake_build
         
     | 
| 1066 | 
         
            +
                    run: |
         
     | 
| 1067 | 
         
            +
                      $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
         
     | 
| 1068 | 
         
            +
                      $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
         
     | 
| 1069 | 
         
            +
                      cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
         
     | 
| 1070 | 
         
            +
                      cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
         
     | 
| 1071 | 
         
            +
                      md "build\bin\rocblas\library\"
         
     | 
| 1072 | 
         
            +
                      cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
         
     | 
| 1073 | 
         
            +
                      cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
         
     | 
| 1074 | 
         
            +
                      cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
         
     | 
| 1075 | 
         
            +
             
     | 
| 1076 | 
         
            +
                  - name: Determine tag name
         
     | 
| 1077 | 
         
            +
                    id: tag
         
     | 
| 1078 | 
         
            +
                    shell: bash
         
     | 
| 1079 | 
         
            +
                    run: |
         
     | 
| 1080 | 
         
            +
                      BUILD_NUMBER="$(git rev-list --count HEAD)"
         
     | 
| 1081 | 
         
            +
                      SHORT_HASH="$(git rev-parse --short=7 HEAD)"
         
     | 
| 1082 | 
         
            +
                      if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
         
     | 
| 1083 | 
         
            +
                        echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
         
     | 
| 1084 | 
         
            +
                      else
         
     | 
| 1085 | 
         
            +
                        SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
         
     | 
| 1086 | 
         
            +
                        echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
         
     | 
| 1087 | 
         
            +
                      fi
         
     | 
| 1088 | 
         
            +
             
     | 
| 1089 | 
         
            +
                  - name: Pack artifacts
         
     | 
| 1090 | 
         
            +
                    id: pack_artifacts
         
     | 
| 1091 | 
         
            +
                    run: |
         
     | 
| 1092 | 
         
            +
                      7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
         
     | 
| 1093 | 
         
            +
             
     | 
| 1094 | 
         
            +
                  - name: Upload artifacts
         
     | 
| 1095 | 
         
            +
                    uses: actions/upload-artifact@v4
         
     | 
| 1096 | 
         
            +
                    with:
         
     | 
| 1097 | 
         
            +
                      path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
         
     | 
| 1098 | 
         
            +
                      name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
         
     | 
| 1099 | 
         
            +
             
     | 
| 1100 | 
         
            +
              ios-xcode-build:
         
     | 
| 1101 | 
         
            +
                runs-on: macos-latest
         
     | 
| 1102 | 
         
            +
             
     | 
| 1103 | 
         
            +
                steps:
         
     | 
| 1104 | 
         
            +
                  - name: Checkout code
         
     | 
| 1105 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 1106 | 
         
            +
             
     | 
| 1107 | 
         
            +
                  - name: Build Xcode project
         
     | 
| 1108 | 
         
            +
                    run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
         
     | 
| 1109 | 
         
            +
             
     | 
| 1110 | 
         
            +
              android-build:
         
     | 
| 1111 | 
         
            +
                runs-on: ubuntu-latest
         
     | 
| 1112 | 
         
            +
             
     | 
| 1113 | 
         
            +
                steps:
         
     | 
| 1114 | 
         
            +
                  - name: Clone
         
     | 
| 1115 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 1116 | 
         
            +
             
     | 
| 1117 | 
         
            +
                  - name: Set up JDK
         
     | 
| 1118 | 
         
            +
                    uses: actions/setup-java@v3
         
     | 
| 1119 | 
         
            +
                    with:
         
     | 
| 1120 | 
         
            +
                      java-version: 17
         
     | 
| 1121 | 
         
            +
                      distribution: zulu
         
     | 
| 1122 | 
         
            +
             
     | 
| 1123 | 
         
            +
                  - name: Setup Android SDK
         
     | 
| 1124 | 
         
            +
                    uses: android-actions/setup-android@v3
         
     | 
| 1125 | 
         
            +
                    with:
         
     | 
| 1126 | 
         
            +
                      log-accepted-android-sdk-licenses: false
         
     | 
| 1127 | 
         
            +
             
     | 
| 1128 | 
         
            +
                  - name: Build
         
     | 
| 1129 | 
         
            +
                    run: |
         
     | 
| 1130 | 
         
            +
                      cd examples/llama.android
         
     | 
| 1131 | 
         
            +
             
     | 
| 1132 | 
         
            +
                      ./gradlew build --no-daemon
         
     | 
| 1133 | 
         
            +
             
     | 
| 1134 | 
         
            +
            #  freeBSD-latest:
         
     | 
| 1135 | 
         
            +
            #    runs-on: macos-12
         
     | 
| 1136 | 
         
            +
            #    steps:
         
     | 
| 1137 | 
         
            +
            #    - name: Clone
         
     | 
| 1138 | 
         
            +
            #      uses: actions/checkout@v4
         
     | 
| 1139 | 
         
            +
            #
         
     | 
| 1140 | 
         
            +
            #    - name: Build
         
     | 
| 1141 | 
         
            +
            #      uses: cross-platform-actions/action@v0.19.0
         
     | 
| 1142 | 
         
            +
            #      with:
         
     | 
| 1143 | 
         
            +
            #        operating_system: freebsd
         
     | 
| 1144 | 
         
            +
            #        version: '13.2'
         
     | 
| 1145 | 
         
            +
            #        hypervisor: 'qemu'
         
     | 
| 1146 | 
         
            +
            #        run: |
         
     | 
| 1147 | 
         
            +
            #            sudo pkg update
         
     | 
| 1148 | 
         
            +
            #            sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
         
     | 
| 1149 | 
         
            +
            #            gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
         
     | 
| 1150 | 
         
            +
             
     | 
| 1151 | 
         
            +
              release:
         
     | 
| 1152 | 
         
            +
                if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
         
     | 
| 1153 | 
         
            +
             
     | 
| 1154 | 
         
            +
                runs-on: ubuntu-latest
         
     | 
| 1155 | 
         
            +
             
     | 
| 1156 | 
         
            +
                needs:
         
     | 
| 1157 | 
         
            +
                  - ubuntu-latest-cmake
         
     | 
| 1158 | 
         
            +
                  - macOS-latest-cmake
         
     | 
| 1159 | 
         
            +
                  - windows-latest-cmake
         
     | 
| 1160 | 
         
            +
                  - windows-2019-cmake-cuda
         
     | 
| 1161 | 
         
            +
                  - windows-latest-cmake-hip-release
         
     | 
| 1162 | 
         
            +
                  - macOS-latest-cmake-arm64
         
     | 
| 1163 | 
         
            +
                  - macOS-latest-cmake-x64
         
     | 
| 1164 | 
         
            +
             
     | 
| 1165 | 
         
            +
                steps:
         
     | 
| 1166 | 
         
            +
                  - name: Clone
         
     | 
| 1167 | 
         
            +
                    id: checkout
         
     | 
| 1168 | 
         
            +
                    uses: actions/checkout@v4
         
     | 
| 1169 | 
         
            +
                    with:
         
     | 
| 1170 | 
         
            +
                      fetch-depth: 0
         
     | 
| 1171 | 
         
            +
             
     | 
| 1172 | 
         
            +
                  - name: Determine tag name
         
     | 
| 1173 | 
         
            +
                    id: tag
         
     | 
| 1174 | 
         
            +
                    shell: bash
         
     | 
| 1175 | 
         
            +
                    run: |
         
     | 
| 1176 | 
         
            +
                      BUILD_NUMBER="$(git rev-list --count HEAD)"
         
     | 
| 1177 | 
         
            +
                      SHORT_HASH="$(git rev-parse --short=7 HEAD)"
         
     | 
| 1178 | 
         
            +
                      if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
         
     | 
| 1179 | 
         
            +
                        echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
         
     | 
| 1180 | 
         
            +
                      else
         
     | 
| 1181 | 
         
            +
                        SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
         
     | 
| 1182 | 
         
            +
                        echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
         
     | 
| 1183 | 
         
            +
                      fi
         
     | 
| 1184 | 
         
            +
             
     | 
| 1185 | 
         
            +
                  - name: Download artifacts
         
     | 
| 1186 | 
         
            +
                    id: download-artifact
         
     | 
| 1187 | 
         
            +
                    uses: actions/download-artifact@v4
         
     | 
| 1188 | 
         
            +
                    with:
         
     | 
| 1189 | 
         
            +
                      path: ./artifact
         
     | 
| 1190 | 
         
            +
             
     | 
| 1191 | 
         
            +
                  - name: Move artifacts
         
     | 
| 1192 | 
         
            +
                    id: move_artifacts
         
     | 
| 1193 | 
         
            +
                    run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
         
     | 
| 1194 | 
         
            +
             
     | 
| 1195 | 
         
            +
                  - name: Create release
         
     | 
| 1196 | 
         
            +
                    id: create_release
         
     | 
| 1197 | 
         
            +
                    uses: anzz1/action-create-release@v1
         
     | 
| 1198 | 
         
            +
                    env:
         
     | 
| 1199 | 
         
            +
                      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         
     | 
| 1200 | 
         
            +
                    with:
         
     | 
| 1201 | 
         
            +
                      tag_name: ${{ steps.tag.outputs.name }}
         
     | 
| 1202 | 
         
            +
             
     | 
| 1203 | 
         
            +
                  - name: Upload release
         
     | 
| 1204 | 
         
            +
                    id: upload_release
         
     | 
| 1205 | 
         
            +
                    uses: actions/github-script@v3
         
     | 
| 1206 | 
         
            +
                    with:
         
     | 
| 1207 | 
         
            +
                      github-token: ${{secrets.GITHUB_TOKEN}}
         
     | 
| 1208 | 
         
            +
                      script: |
         
     | 
| 1209 | 
         
            +
                        const path = require('path');
         
     | 
| 1210 | 
         
            +
                        const fs = require('fs');
         
     | 
| 1211 | 
         
            +
                        const release_id = '${{ steps.create_release.outputs.id }}';
         
     | 
| 1212 | 
         
            +
                        for (let file of await fs.readdirSync('./artifact/release')) {
         
     | 
| 1213 | 
         
            +
                          if (path.extname(file) === '.zip') {
         
     | 
| 1214 | 
         
            +
                            console.log('uploadReleaseAsset', file);
         
     | 
| 1215 | 
         
            +
                            await github.repos.uploadReleaseAsset({
         
     | 
| 1216 | 
         
            +
                              owner: context.repo.owner,
         
     | 
| 1217 | 
         
            +
                              repo: context.repo.repo,
         
     | 
| 1218 | 
         
            +
                              release_id: release_id,
         
     | 
| 1219 | 
         
            +
                              name: file,
         
     | 
| 1220 | 
         
            +
                              data: await fs.readFileSync(`./artifact/release/${file}`)
         
     | 
| 1221 | 
         
            +
                            });
         
     | 
| 1222 | 
         
            +
                          }
         
     | 
| 1223 | 
         
            +
                        }
         
     | 
| 1224 | 
         
            +
             
     | 
| 1225 | 
         
            +
            #  ubuntu-latest-gcc:
         
     | 
| 1226 | 
         
            +
            #    runs-on: ubuntu-latest
         
     | 
| 1227 | 
         
            +
            #
         
     | 
| 1228 | 
         
            +
            #    strategy:
         
     | 
| 1229 | 
         
            +
            #      matrix:
         
     | 
| 1230 | 
         
            +
            #        build: [Debug, Release]
         
     | 
| 1231 | 
         
            +
            #
         
     | 
| 1232 | 
         
            +
            #    steps:
         
     | 
| 1233 | 
         
            +
            #      - name: Clone
         
     | 
| 1234 | 
         
            +
            #        uses: actions/checkout@v4
         
     | 
| 1235 | 
         
            +
            #
         
     | 
| 1236 | 
         
            +
            #      - name: Dependencies
         
     | 
| 1237 | 
         
            +
            #        run: |
         
     | 
| 1238 | 
         
            +
            #          sudo apt-get update
         
     | 
| 1239 | 
         
            +
            #          sudo apt-get install build-essential
         
     | 
| 1240 | 
         
            +
            #          sudo apt-get install cmake
         
     | 
| 1241 | 
         
            +
            #
         
     | 
| 1242 | 
         
            +
            #      - name: Configure
         
     | 
| 1243 | 
         
            +
            #        run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
         
     | 
| 1244 | 
         
            +
            #
         
     | 
| 1245 | 
         
            +
            #      - name: Build
         
     | 
| 1246 | 
         
            +
            #        run: |
         
     | 
| 1247 | 
         
            +
            #          make
         
     | 
| 1248 | 
         
            +
            #
         
     | 
| 1249 | 
         
            +
            #  ubuntu-latest-clang:
         
     | 
| 1250 | 
         
            +
            #    runs-on: ubuntu-latest
         
     | 
| 1251 | 
         
            +
            #
         
     | 
| 1252 | 
         
            +
            #    strategy:
         
     | 
| 1253 | 
         
            +
            #      matrix:
         
     | 
| 1254 | 
         
            +
            #        build: [Debug, Release]
         
     | 
| 1255 | 
         
            +
            #
         
     | 
| 1256 | 
         
            +
            #    steps:
         
     | 
| 1257 | 
         
            +
            #      - name: Clone
         
     | 
| 1258 | 
         
            +
            #        uses: actions/checkout@v4
         
     | 
| 1259 | 
         
            +
            #
         
     | 
| 1260 | 
         
            +
            #      - name: Dependencies
         
     | 
| 1261 | 
         
            +
            #        run: |
         
     | 
| 1262 | 
         
            +
            #          sudo apt-get update
         
     | 
| 1263 | 
         
            +
            #          sudo apt-get install build-essential
         
     | 
| 1264 | 
         
            +
            #          sudo apt-get install cmake
         
     | 
| 1265 | 
         
            +
            #
         
     | 
| 1266 | 
         
            +
            #      - name: Configure
         
     | 
| 1267 | 
         
            +
            #        run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
         
     | 
| 1268 | 
         
            +
            #
         
     | 
| 1269 | 
         
            +
            #      - name: Build
         
     | 
| 1270 | 
         
            +
            #        run: |
         
     | 
| 1271 | 
         
            +
            #          make
         
     | 
| 1272 | 
         
            +
            #
         
     | 
| 1273 | 
         
            +
            #  ubuntu-latest-gcc-sanitized:
         
     | 
| 1274 | 
         
            +
            #    runs-on: ubuntu-latest
         
     | 
| 1275 | 
         
            +
            #
         
     | 
| 1276 | 
         
            +
            #    strategy:
         
     | 
| 1277 | 
         
            +
            #      matrix:
         
     | 
| 1278 | 
         
            +
            #        sanitizer: [ADDRESS, THREAD, UNDEFINED]
         
     | 
| 1279 | 
         
            +
            #
         
     | 
| 1280 | 
         
            +
            #    steps:
         
     | 
| 1281 | 
         
            +
            #      - name: Clone
         
     | 
| 1282 | 
         
            +
            #        uses: actions/checkout@v4
         
     | 
| 1283 | 
         
            +
            #
         
     | 
| 1284 | 
         
            +
            #      - name: Dependencies
         
     | 
| 1285 | 
         
            +
            #        run: |
         
     | 
| 1286 | 
         
            +
            #          sudo apt-get update
         
     | 
| 1287 | 
         
            +
            #          sudo apt-get install build-essential
         
     | 
| 1288 | 
         
            +
            #          sudo apt-get install cmake
         
     | 
| 1289 | 
         
            +
            #
         
     | 
| 1290 | 
         
            +
            #      - name: Configure
         
     | 
| 1291 | 
         
            +
            #        run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
         
     | 
| 1292 | 
         
            +
            #
         
     | 
| 1293 | 
         
            +
            #      - name: Build
         
     | 
| 1294 | 
         
            +
            #        run: |
         
     | 
| 1295 | 
         
            +
            #          make
         
     | 
| 1296 | 
         
            +
            #
         
     | 
| 1297 | 
         
            +
            #  windows:
         
     | 
| 1298 | 
         
            +
            #    runs-on: windows-latest
         
     | 
| 1299 | 
         
            +
            #
         
     | 
| 1300 | 
         
            +
            #    strategy:
         
     | 
| 1301 | 
         
            +
            #      matrix:
         
     | 
| 1302 | 
         
            +
            #        build: [Release]
         
     | 
| 1303 | 
         
            +
            #        arch: [Win32, x64]
         
     | 
| 1304 | 
         
            +
            #        include:
         
     | 
| 1305 | 
         
            +
            #          - arch: Win32
         
     | 
| 1306 | 
         
            +
            #            s2arc: x86
         
     | 
| 1307 | 
         
            +
            #          - arch: x64
         
     | 
| 1308 | 
         
            +
            #            s2arc: x64
         
     | 
| 1309 | 
         
            +
            #
         
     | 
| 1310 | 
         
            +
            #    steps:
         
     | 
| 1311 | 
         
            +
            #      - name: Clone
         
     | 
| 1312 | 
         
            +
            #        uses: actions/checkout@v4
         
     | 
| 1313 | 
         
            +
            #
         
     | 
| 1314 | 
         
            +
            #      - name: Add msbuild to PATH
         
     | 
| 1315 | 
         
            +
            #        uses: microsoft/setup-msbuild@v1
         
     | 
| 1316 | 
         
            +
            #
         
     | 
| 1317 | 
         
            +
            #      - name: Configure
         
     | 
| 1318 | 
         
            +
            #        run: >
         
     | 
| 1319 | 
         
            +
            #          cmake -S . -B ./build -A ${{ matrix.arch }}
         
     | 
| 1320 | 
         
            +
            #          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
         
     | 
| 1321 | 
         
            +
            #
         
     | 
| 1322 | 
         
            +
            #      - name: Build
         
     | 
| 1323 | 
         
            +
            #        run: |
         
     | 
| 1324 | 
         
            +
            #          cd ./build
         
     | 
| 1325 | 
         
            +
            #          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
         
     | 
| 1326 | 
         
            +
            #
         
     | 
| 1327 | 
         
            +
            #      - name: Upload binaries
         
     | 
| 1328 | 
         
            +
            #        uses: actions/upload-artifact@v4
         
     | 
| 1329 | 
         
            +
            #        with:
         
     | 
| 1330 | 
         
            +
            #          name: llama-bin-${{ matrix.arch }}
         
     | 
| 1331 | 
         
            +
            #          path: build/bin/${{ matrix.build }}
         
     | 
| 1332 | 
         
            +
            #
         
     | 
| 1333 | 
         
            +
            #  windows-blas:
         
     | 
| 1334 | 
         
            +
            #    runs-on: windows-latest
         
     | 
| 1335 | 
         
            +
            #
         
     | 
| 1336 | 
         
            +
            #    strategy:
         
     | 
| 1337 | 
         
            +
            #      matrix:
         
     | 
| 1338 | 
         
            +
            #        build: [Release]
         
     | 
| 1339 | 
         
            +
            #        arch: [Win32, x64]
         
     | 
| 1340 | 
         
            +
            #        blas: [ON]
         
     | 
| 1341 | 
         
            +
            #        include:
         
     | 
| 1342 | 
         
            +
            #          - arch: Win32
         
     | 
| 1343 | 
         
            +
            #            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
         
     | 
| 1344 | 
         
            +
            #            s2arc: x86
         
     | 
| 1345 | 
         
            +
            #          - arch: x64
         
     | 
| 1346 | 
         
            +
            #            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
         
     | 
| 1347 | 
         
            +
            #            s2arc: x64
         
     | 
| 1348 | 
         
            +
            #
         
     | 
| 1349 | 
         
            +
            #    steps:
         
     | 
| 1350 | 
         
            +
            #      - name: Clone
         
     | 
| 1351 | 
         
            +
            #        uses: actions/checkout@v4
         
     | 
| 1352 | 
         
            +
            #
         
     | 
| 1353 | 
         
            +
            #      - name: Add msbuild to PATH
         
     | 
| 1354 | 
         
            +
            #        uses: microsoft/setup-msbuild@v1
         
     | 
| 1355 | 
         
            +
            #
         
     | 
| 1356 | 
         
            +
            #      - name: Fetch OpenBLAS
         
     | 
| 1357 | 
         
            +
            #        if: matrix.blas == 'ON'
         
     | 
| 1358 | 
         
            +
            #        run: |
         
     | 
| 1359 | 
         
            +
            #          C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
         
     | 
| 1360 | 
         
            +
            #          7z x blas.zip -oblas -y
         
     | 
| 1361 | 
         
            +
            #          copy blas/include/cblas.h .
         
     | 
| 1362 | 
         
            +
            #          copy blas/include/openblas_config.h .
         
     | 
| 1363 | 
         
            +
            #          echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
         
     | 
| 1364 | 
         
            +
            #
         
     | 
| 1365 | 
         
            +
            #      - name: Configure
         
     | 
| 1366 | 
         
            +
            #        run: >
         
     | 
| 1367 | 
         
            +
            #          cmake -S . -B ./build -A ${{ matrix.arch }}
         
     | 
| 1368 | 
         
            +
            #          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
         
     | 
| 1369 | 
         
            +
            #          -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
         
     | 
| 1370 | 
         
            +
            #          -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
         
     | 
| 1371 | 
         
            +
            #
         
     | 
| 1372 | 
         
            +
            #      - name: Build
         
     | 
| 1373 | 
         
            +
            #        run: |
         
     | 
| 1374 | 
         
            +
            #          cd ./build
         
     | 
| 1375 | 
         
            +
            #          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
         
     | 
| 1376 | 
         
            +
            #
         
     | 
| 1377 | 
         
            +
            #      - name: Copy libopenblas.dll
         
     | 
| 1378 | 
         
            +
            #        if: matrix.blas == 'ON'
         
     | 
| 1379 | 
         
            +
            #        run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
         
     | 
| 1380 | 
         
            +
            #
         
     | 
| 1381 | 
         
            +
            #      - name: Upload binaries
         
     | 
| 1382 | 
         
            +
            #        if: matrix.blas == 'ON'
         
     | 
| 1383 | 
         
            +
            #        uses: actions/upload-artifact@v4
         
     | 
| 1384 | 
         
            +
            #        with:
         
     | 
| 1385 | 
         
            +
            #          name: llama-blas-bin-${{ matrix.arch }}
         
     | 
| 1386 | 
         
            +
            #          path: build/bin/${{ matrix.build }}
         
     | 
| 1387 | 
         
            +
            #
         
     | 
| 1388 | 
         
            +
            #  emscripten:
         
     | 
| 1389 | 
         
            +
            #    runs-on: ubuntu-latest
         
     | 
| 1390 | 
         
            +
            #
         
     | 
| 1391 | 
         
            +
            #    strategy:
         
     | 
| 1392 | 
         
            +
            #      matrix:
         
     | 
| 1393 | 
         
            +
            #        build: [Release]
         
     | 
| 1394 | 
         
            +
            #
         
     | 
| 1395 | 
         
            +
            #    steps:
         
     | 
| 1396 | 
         
            +
            #      - name: Clone
         
     | 
| 1397 | 
         
            +
            #        uses: actions/checkout@v4
         
     | 
| 1398 | 
         
            +
            #
         
     | 
| 1399 | 
         
            +
            #      - name: Dependencies
         
     | 
| 1400 | 
         
            +
            #        run: |
         
     | 
| 1401 | 
         
            +
            #          wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
         
     | 
| 1402 | 
         
            +
            #          tar -xvf master.tar.gz
         
     | 
| 1403 | 
         
            +
            #          emsdk-master/emsdk update
         
     | 
| 1404 | 
         
            +
            #          emsdk-master/emsdk install latest
         
     | 
| 1405 | 
         
            +
            #          emsdk-master/emsdk activate latest
         
     | 
| 1406 | 
         
            +
            #
         
     | 
| 1407 | 
         
            +
            #      - name: Configure
         
     | 
| 1408 | 
         
            +
            #        run: echo "tmp"
         
     | 
| 1409 | 
         
            +
            #
         
     | 
| 1410 | 
         
            +
            #      - name: Build
         
     | 
| 1411 | 
         
            +
            #        run: |
         
     | 
| 1412 | 
         
            +
            #          pushd emsdk-master
         
     | 
| 1413 | 
         
            +
            #          source ./emsdk_env.sh
         
     | 
| 1414 | 
         
            +
            #          popd
         
     | 
| 1415 | 
         
            +
            #          emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
         
     | 
| 1416 | 
         
            +
            #          make
         
     | 
    	
        llama.cpp/.github/workflows/close-issue.yml
    ADDED
    
    | 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            name: Close inactive issues
         
     | 
| 2 | 
         
            +
            on:
         
     | 
| 3 | 
         
            +
              schedule:
         
     | 
| 4 | 
         
            +
                - cron: "42 0 * * *"
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Fine-grant permission
         
     | 
| 7 | 
         
            +
            # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
         
     | 
| 8 | 
         
            +
            permissions:
         
     | 
| 9 | 
         
            +
              issues: write
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            jobs:
         
     | 
| 12 | 
         
            +
              close-issues:
         
     | 
| 13 | 
         
            +
                runs-on: ubuntu-latest
         
     | 
| 14 | 
         
            +
                permissions:
         
     | 
| 15 | 
         
            +
                  issues: write
         
     | 
| 16 | 
         
            +
                  pull-requests: write
         
     | 
| 17 | 
         
            +
                steps:
         
     | 
| 18 | 
         
            +
                  - uses: actions/stale@v5
         
     | 
| 19 | 
         
            +
                    with:
         
     | 
| 20 | 
         
            +
                      exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
         
     | 
| 21 | 
         
            +
                      days-before-issue-stale: 30
         
     | 
| 22 | 
         
            +
                      days-before-issue-close: 14
         
     | 
| 23 | 
         
            +
                      stale-issue-label: "stale"
         
     | 
| 24 | 
         
            +
                      close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
         
     | 
| 25 | 
         
            +
                      days-before-pr-stale: -1
         
     | 
| 26 | 
         
            +
                      days-before-pr-close: -1
         
     | 
| 27 | 
         
            +
                      operations-per-run: 10000
         
     | 
| 28 | 
         
            +
                      repo-token: ${{ secrets.GITHUB_TOKEN }}
         
     |