File size: 2,877 Bytes
5bdad4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# !/usr/bin/env bash

#################
# Parse options #
#################

instructions() {
  echo "Usage: $0 [-n] [-c]" >&2
  echo " -n: number of servers to run" >&2
  echo " -c: path to the server configs" >&2
  exit 1
}

if [ $# != 4 ]; then
    instructions
fi

while getopts ":n:c:t:" option; do
    case $option in
        n)  NUM_SERVERS=${OPTARG}
            ;;
        c)  CONFIG_PATH=${OPTARG}
            ;;
        \?) instructions
            ;;
   esac
done


###########################
# Install or activate env #
###########################

source ~/miniconda3/etc/profile.d/conda.sh
if conda env list | grep ".*bloom-demo.*"  &>/dev/null; then
    conda activate bloom-demo
else
    conda create -y --name bloom-demo python=3.8.12 pip
    conda activate bloom-demo

    conda install -y -c conda-forge cudatoolkit-dev==11.3.1 cudatoolkit==11.3.1 cudnn==8.2.1.32
    pip install -i https://pypi.org/simple torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
    pip install -i https://pypi.org/simple accelerate==0.10.0 huggingface-hub==0.7.0 hivemind==1.1.0
    pip install -i https://pypi.org/simple bitsandbytes-cuda113==0.26.0
    pip install -i https://pypi.org/simple https://github.com/huggingface/transformers/archive/6589e510fa4e6c442059de2fab84752535de9b23.zip
fi


#######################
# Create Initial peer #
#######################

hivemind-dht &> tmp.out &
sleep 3
INITIAL_PEER=$(python -c "with open('tmp.out') as f: print(f.readlines()[1].split()[-1])" )
echo "Initial peer: ${INITIAL_PEER}"


##############################
# Initialize the config file #
##############################

typeset -A cfg 
cfg=( # set default values in config array
    [device]="cpu"
    [block_ids]="1:2"
    [id_path]="server.id"
    [maddr]="/ip4/127.0.0.1/tcp/30000"
)

###############
# Run servers #
###############

for SERVER_ID in $(seq 0 $(( $NUM_SERVERS - 1 )) )
do  
    ###############
    # Read config #
    ###############

    while read line
    do
        if echo $line | grep -F = &>/dev/null
        then
            varname=$(echo "$line" | cut -d '=' -f 1)
            cfg[$varname]=$(echo "$line" | cut -d '=' -f 2-)
        fi
    done < ${CONFIG_PATH}/server_${SERVER_ID}.cfg
    
    echo "=== Server #${SERVER_ID} ==="
    echo "Server ID: ${id_path}"
    echo "Device: ${cfg[device]}"
    echo "Bloom block ids: ${cfg[block_ids]}"
    echo "Host maddr: ${cfg[maddr]}"
    echo ""
    
    ##############
    # Run server #
    ##############

    tmux new-session -d -s "Server_${SERVER_ID}" bash cli/deploy_server.sh -i ${INITIAL_PEER} -d ${cfg[device]} -p ${cfg[id_path]} -b ${cfg[block_ids]} -a ${cfg[maddr]}
done


#####################
# Kill initial peer #
#####################

sleep 10
pkill -f hivemind-dht # TODO: kill only particular pids of hivemind-dht
rm tmp.out