leafspark
/

DeepSeek-V2-Chat-GGUF

+#!/bin/bash
+# Create and change to the directory
+mkdir -p DeepSeek-V2-Chat.Q2_K.gguf
+cd DeepSeek-V2-Chat.Q2_K.gguf
+# Download the GGUF files
+for i in {1..5}; do
+    wget "https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf?download=true" -O DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf
+done
+# Download the llama.cpp binaries based on the OS
+case "$(uname -s)" in
+    Linux)
+        wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-ubuntu-x64.zip
+        unzip llama-b2961-bin-ubuntu-x64.zip -d .
+        ;;
+    Darwin)
+        if [[ $(uname -m) == 'arm64' ]]; then
+            wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-arm64.zip
+            unzip llama-b2961-bin-macos-arm64.zip -d .
+        else
+            wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-x64.zip
+            unzip llama-b2961-bin-macos-x64.zip -d .
+        fi
+        ;;
+esac
+# Execute the server command
+./server \
+  -m DeepSeek-V2-Chat.q2_k.gguf \
+  -c 4096 \
+  -i \
+  --mlock
+  --override-kv deepseek2.attention.q_lora_rank=int:1536
+  --override-kv deepseek2.attention.kv_lora_rank=int:512
+  --override-kv deepseek2.expert_shared_count=int:2
+  --override-kv deepseek2.expert_feed_forward_length=int:1536
+  --override-kv deepseek2.leading_dense_block_count=int:1