Gum script for Koboldcpp
Browse files- Scripts/kobold-server.sh +45 -0
Scripts/kobold-server.sh
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/bash
|
2 |
+
|
3 |
+
|
4 |
+
# requires the gum package
|
5 |
+
# Uses conda & koboldcpp built from source
|
6 |
+
# Assumes you are using nvidia
|
7 |
+
|
8 |
+
# activates conda env named kobold
|
9 |
+
eval "$(conda shell.bash hook)"
|
10 |
+
conda activate kobold
|
11 |
+
python -V
|
12 |
+
|
13 |
+
# USER variables
|
14 |
+
# CHANGE
|
15 |
+
KOBOLD_DIR=~/Git/koboldcpp/
|
16 |
+
MODEL_FOLDER_DIR=~/Downloads/GGUF
|
17 |
+
API_PORT=8001
|
18 |
+
API_HOST=192.168.1.20
|
19 |
+
BLAS_THREADS=16
|
20 |
+
THREADS=8
|
21 |
+
|
22 |
+
# CD to koboldcpp dir
|
23 |
+
cd $KOBOLD_DIR
|
24 |
+
|
25 |
+
# lists models
|
26 |
+
echo "Select Model"
|
27 |
+
MODEL=$(gum choose $(ls $MODEL_FOLDER_DIR))
|
28 |
+
# uncomment if you use sharded models. It will take the first file in a dir and load it.
|
29 |
+
#SHARDED_MODEL=$(ls -p $MODEL_FOLDER_DIR/$MODEL | grep -v / | head -1)
|
30 |
+
#MODEL=$MODEL/$SHARDED_MODEL
|
31 |
+
echo "$MODEL has been selected"
|
32 |
+
|
33 |
+
echo "Layers to Offload"
|
34 |
+
LAYERS=$(gum input --placeholder "99")
|
35 |
+
echo "$LAYERS layers have been offloaded"
|
36 |
+
|
37 |
+
echo "Context Size"
|
38 |
+
CONTEXT=$(gum choose "4096" "8192" "12288" "16384" "32768")
|
39 |
+
echo "Using a context size of $CONTEXT"
|
40 |
+
|
41 |
+
# combined user flags
|
42 |
+
USER_FLAGS="--host $API_HOST --port $API_PORT --flashattention --blasbatchsize 2048 --threads $THREADS --blasthreads $BLAS_THREADS --usecublas normal --contextsize $CONTEXT --gpulayers $LAYERS --model $MODEL_FOLDER_DIR/$MODEL"
|
43 |
+
|
44 |
+
# runs koboldcpp
|
45 |
+
python koboldcpp.py --skiplauncher $USER_FLAGS
|