File size: 1,446 Bytes
47eaf19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash

# Create and change to the directory
mkdir -p DeepSeek-V2-Chat.Q2_K.gguf
cd DeepSeek-V2-Chat.Q2_K.gguf

# Download the GGUF files
for i in {1..5}; do
    wget "https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf?download=true" -O DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf
done

# Download the llama.cpp binaries based on the OS
case "$(uname -s)" in
    Linux)
        wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-ubuntu-x64.zip
        unzip llama-b2961-bin-ubuntu-x64.zip -d .
        ;;
    Darwin)
        if [[ $(uname -m) == 'arm64' ]]; then
            wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-arm64.zip
            unzip llama-b2961-bin-macos-arm64.zip -d .
        else
            wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-x64.zip
            unzip llama-b2961-bin-macos-x64.zip -d .
        fi
        ;;
esac

# Execute the server command
./server \
  -m DeepSeek-V2-Chat.q2_k.gguf \
  -c 4096 \
  -i \
  --mlock
  --override-kv deepseek2.attention.q_lora_rank=int:1536
  --override-kv deepseek2.attention.kv_lora_rank=int:512
  --override-kv deepseek2.expert_shared_count=int:2
  --override-kv deepseek2.expert_feed_forward_length=int:1536
  --override-kv deepseek2.leading_dense_block_count=int:1