Yixin Liu commited on
Commit
3720a59
1 Parent(s): 8963c43
Files changed (1) hide show
  1. config.py +10 -0
config.py CHANGED
@@ -82,6 +82,7 @@ function update_device_idx {
82
  fi;
83
 
84
  # so all the conditions are satisfied, we can update the device idx and run the next experiment
 
85
  while true; do
86
  current_device_idx=$((current_device_idx+1))
87
  if [ $current_device_idx -ge ${#available_devices[@]} ]; then
@@ -93,9 +94,18 @@ function update_device_idx {
93
  useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
94
  utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
95
 
 
96
  if [ $useage -ge $((total_gpu_memory-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
97
  echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
98
  sleep 3
 
 
 
 
 
 
 
 
99
  continue
100
  else
101
  break
 
82
  fi;
83
 
84
  # so all the conditions are satisfied, we can update the device idx and run the next experiment
85
+ cnt_longer_sleep=0
86
  while true; do
87
  current_device_idx=$((current_device_idx+1))
88
  if [ $current_device_idx -ge ${#available_devices[@]} ]; then
 
94
  useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
95
  utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
96
 
97
+
98
  if [ $useage -ge $((total_gpu_memory-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
99
  echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
100
  sleep 3
101
+
102
+ # when cnt_longer_sleep mod $gpu_num == 0, we sleep longer
103
+ cnt_longer_sleep=$((cnt_longer_sleep+1))
104
+ cnt_longer_sleep=$(echo "$cnt_longer_sleep%${#available_devices[@]}" | bc)
105
+ if [ $cnt_longer_sleep -eq 0 ]; then
106
+ sleep 60
107
+ fi
108
+
109
  continue
110
  else
111
  break