Spaces:
Runtime error
Runtime error
{ | |
lib, | |
glibc, | |
config, | |
stdenv, | |
runCommand, | |
cmake, | |
ninja, | |
pkg-config, | |
git, | |
mpi, | |
blas, | |
cudaPackages, | |
autoAddDriverRunpath, | |
darwin, | |
rocmPackages, | |
vulkan-headers, | |
vulkan-loader, | |
curl, | |
shaderc, | |
useBlas ? | |
builtins.all (x: !x) [ | |
useCuda | |
useMetalKit | |
useRocm | |
useVulkan | |
] | |
&& blas.meta.available, | |
useCuda ? config.cudaSupport, | |
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin, | |
# Increases the runtime closure size by ~700M | |
useMpi ? false, | |
useRocm ? config.rocmSupport, | |
enableCurl ? true, | |
useVulkan ? false, | |
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake | |
# It's necessary to consistently use backendStdenv when building with CUDA support, | |
# otherwise we get libstdc++ errors downstream. | |
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, | |
enableStatic ? effectiveStdenv.hostPlatform.isStatic, | |
precompileMetalShaders ? false, | |
}: | |
let | |
inherit (lib) | |
cmakeBool | |
cmakeFeature | |
optionals | |
strings | |
; | |
stdenv = throw "Use effectiveStdenv instead"; | |
suffices = | |
lib.optionals useBlas [ "BLAS" ] | |
++ lib.optionals useCuda [ "CUDA" ] | |
++ lib.optionals useMetalKit [ "MetalKit" ] | |
++ lib.optionals useMpi [ "MPI" ] | |
++ lib.optionals useRocm [ "ROCm" ] | |
++ lib.optionals useVulkan [ "Vulkan" ]; | |
pnameSuffix = | |
strings.optionalString (suffices != [ ]) | |
"-${strings.concatMapStringsSep "-" strings.toLower suffices}"; | |
descriptionSuffix = strings.optionalString ( | |
suffices != [ ] | |
) ", accelerated with ${strings.concatStringsSep ", " suffices}"; | |
xcrunHost = runCommand "xcrunHost" { } '' | |
mkdir -p $out/bin | |
ln -s /usr/bin/xcrun $out/bin | |
''; | |
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 | |
# separately | |
darwinBuildInputs = | |
with darwin.apple_sdk.frameworks; | |
[ | |
Accelerate | |
CoreVideo | |
CoreGraphics | |
] | |
++ optionals useMetalKit [ MetalKit ]; | |
cudaBuildInputs = with cudaPackages; [ | |
cuda_cudart | |
cuda_cccl # <nv/target> | |
libcublas | |
]; | |
rocmBuildInputs = with rocmPackages; [ | |
clr | |
hipblas | |
rocblas | |
]; | |
vulkanBuildInputs = [ | |
vulkan-headers | |
vulkan-loader | |
shaderc | |
]; | |
in | |
effectiveStdenv.mkDerivation (finalAttrs: { | |
pname = "llama-cpp${pnameSuffix}"; | |
version = llamaVersion; | |
# Note: none of the files discarded here are visible in the sandbox or | |
# affect the output hash. This also means they can be modified without | |
# triggering a rebuild. | |
src = lib.cleanSourceWith { | |
filter = | |
name: type: | |
let | |
noneOf = builtins.all (x: !x); | |
baseName = baseNameOf name; | |
in | |
noneOf [ | |
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths | |
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths | |
(lib.hasPrefix "." baseName) # Skip hidden files and directories | |
(baseName == "flake.lock") | |
]; | |
src = lib.cleanSource ../../.; | |
}; | |
postPatch = '' | |
substituteInPlace ./ggml/src/ggml-metal.m \ | |
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | |
substituteInPlace ./ggml/src/ggml-metal.m \ | |
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" | |
''; | |
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, | |
# `default.metallib` may be compiled with Metal compiler from XCode | |
# and we need to escape sandbox on MacOS to access Metal compiler. | |
# `xcrun` is used find the path of the Metal compiler, which is varible | |
# and not on $PATH | |
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion | |
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; | |
nativeBuildInputs = | |
[ | |
cmake | |
ninja | |
pkg-config | |
git | |
] | |
++ optionals useCuda [ | |
cudaPackages.cuda_nvcc | |
autoAddDriverRunpath | |
] | |
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ] | |
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ]; | |
buildInputs = | |
optionals effectiveStdenv.isDarwin darwinBuildInputs | |
++ optionals useCuda cudaBuildInputs | |
++ optionals useMpi [ mpi ] | |
++ optionals useRocm rocmBuildInputs | |
++ optionals useBlas [ blas ] | |
++ optionals useVulkan vulkanBuildInputs | |
++ optionals enableCurl [ curl ]; | |
cmakeFlags = | |
[ | |
(cmakeBool "LLAMA_BUILD_SERVER" true) | |
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) | |
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | |
(cmakeBool "LLAMA_CURL" enableCurl) | |
(cmakeBool "GGML_NATIVE" false) | |
(cmakeBool "GGML_BLAS" useBlas) | |
(cmakeBool "GGML_CUDA" useCuda) | |
(cmakeBool "GGML_HIPBLAS" useRocm) | |
(cmakeBool "GGML_METAL" useMetalKit) | |
(cmakeBool "GGML_VULKAN" useVulkan) | |
(cmakeBool "GGML_STATIC" enableStatic) | |
] | |
++ optionals useCuda [ | |
( | |
with cudaPackages.flags; | |
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( | |
builtins.concatStringsSep ";" (map dropDot cudaCapabilities) | |
) | |
) | |
] | |
++ optionals useRocm [ | |
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") | |
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets)) | |
] | |
++ optionals useMetalKit [ | |
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") | |
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) | |
]; | |
# Environment variables needed for ROCm | |
env = optionals useRocm { | |
ROCM_PATH = "${rocmPackages.clr}"; | |
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; | |
}; | |
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, | |
# if they haven't been added yet. | |
postInstall = '' | |
mkdir -p $out/include | |
cp $src/include/llama.h $out/include/ | |
''; | |
meta = { | |
# Configurations we don't want even the CI to evaluate. Results in the | |
# "unsupported platform" messages. This is mostly a no-op, because | |
# cudaPackages would've refused to evaluate anyway. | |
badPlatforms = optionals useCuda lib.platforms.darwin; | |
# Configurations that are known to result in build failures. Can be | |
# overridden by importing Nixpkgs with `allowBroken = true`. | |
broken = (useMetalKit && !effectiveStdenv.isDarwin); | |
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | |
homepage = "https://github.com/ggerganov/llama.cpp/"; | |
license = lib.licenses.mit; | |
# Accommodates `nix run` and `lib.getExe` | |
mainProgram = "llama-cli"; | |
# These people might respond, on the best effort basis, if you ping them | |
# in case of Nix-specific regressions or for reviewing Nix-specific PRs. | |
# Consider adding yourself to this list if you want to ensure this flake | |
# stays maintained and you're willing to invest your time. Do not add | |
# other people without their consent. Consider removing people after | |
# they've been unreachable for long periods of time. | |
# Note that lib.maintainers is defined in Nixpkgs, but you may just add | |
# an attrset following the same format as in | |
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix | |
maintainers = with lib.maintainers; [ | |
philiptaron | |
SomeoneSerge | |
]; | |
# Extend `badPlatforms` instead | |
platforms = lib.platforms.all; | |
}; | |
}) | |