various-settings / card-temperature-angel.sh
zaq-hack's picture
Update card-temperature-angel.sh
4859454 verified
#!/bin/bash
# This script runs in the background and checks the temp of my 3060 and 3090 cards.
# The concept is to cut voltage when approaching a thermal limit. This "smoothes out"
# the usage compared to native thermal throttling. This is useful for long sessions
# of Stable Diffusion or heavy chat models. You may need to tune for your own setup,
# temperature preferences, and so on. This is provided with ZERO WARRANTY, so your
# mileage may vary. In general, you should not do magic you do not understand.
p0max=170
p1max=370
tmax=72
tmin=55
let p0=$p0max
let p1=$p1max
sudo nvidia-smi -i 0 -pl $p0
sudo nvidia-smi -i 1 -pl $p1
let s=0
while [ true ]
do
clear
nvidia-smi
zzz=10
d0=`nvidia-smi --query-gpu=temperature.gpu,gpu_name --format=csv,noheader | grep 3060`
t0="${d0:0:2}"
echo 3060 Temperature $t0. Present Watts $p0. Max Watts $p0max
p0action="3060 (=)"
if [[ $t0 -gt $tmax ]]
then
let p0=p0-1
sudo nvidia-smi -i 0 -pl $p0
zzz=1
p0action="3060 (-)"
s=0
fi
if [[ $t0 -le $tmin ]] && [[ $p0 -lt $p0max ]]
then
let p0=p0+1
sudo nvidia-smi -i 0 -pl $p0
zzz=2
p0action="3060 (+)"
s=0
fi
d1=`nvidia-smi --query-gpu=temperature.gpu,gpu_name --format=csv,noheader | grep 3090`
t1="${d1:0:2}"
echo 3090 Temperature $t1. Present Watts $p1. Max Watts $p1max
p1action="3090 (=)"
if [[ $t1 -gt $tmax ]]
then
let p1=p1-1
sudo nvidia-smi -i 1 -pl $p1
zzz=1
p1action="3090 (-)"
s=0
fi
if [[ $t1 -le $tmin ]] && [[ $p1 -lt $p1max ]]
then
let p1=p1+1
sudo nvidia-smi -i 1 -pl $p1
zzz=2
p1action="3090 (+)"
s=0
fi
if [[ $s -gt 10 ]]
then
s=0
if [[ $p0 -lt $p0max ]]
then
p0=p0+1
sudo nvidia-smi -i 0 -pl $p0
fi
if [[ $p1 -lt $p1max ]]
then
p1=p1+1
sudo nvidia-smi -i 1 -pl $p1
fi
fi
echo $p0action $p1action
sensors gigabyte_wmi-virtual-0
sleep $zzz
done