Spaces:
Runtime error
Runtime error
try: | |
import pynvml as nv | |
nvml_ok = True | |
except ImportError: | |
nvml_ok = False | |
nvml_initialized = False | |
def get_reason(val): | |
throttle = { | |
1: 'gpu idle', | |
2: 'applications clocks setting', | |
4: 'sw power cap', | |
8: 'hw slowdown', | |
16: 'sync boost', | |
32: 'sw thermal slowdown', | |
64: 'hw thermal slowdown', | |
128: 'hw power brake slowdown', | |
256: 'display clock setting', | |
} | |
reason = ', '.join([throttle[i] for i in throttle if i & val]) | |
return reason if len(reason) > 0 else 'ok' | |
def get_nvml(): | |
global nvml_initialized # pylint: disable=global-statement | |
global nvml_ok # pylint: disable=global-statement | |
if not nvml_ok: | |
return [] | |
try: | |
if not nvml_initialized: | |
nvml_initialized = True | |
nv.nvmlInit() | |
devices = [] | |
for i in range(nv.nvmlDeviceGetCount()): | |
dev = nv.nvmlDeviceGetHandleByIndex(i) | |
device = { | |
'name': nv.nvmlDeviceGetName(dev), | |
'version': { | |
'cuda': nv.nvmlSystemGetCudaDriverVersion(), | |
'driver': nv.nvmlSystemGetDriverVersion(), | |
'vbios': nv.nvmlDeviceGetVbiosVersion(dev), | |
'rom': nv.nvmlDeviceGetInforomImageVersion(dev), | |
'capabilities': nv.nvmlDeviceGetCudaComputeCapability(dev), | |
}, | |
'pci': { | |
'link': nv.nvmlDeviceGetCurrPcieLinkGeneration(dev), | |
'width': nv.nvmlDeviceGetCurrPcieLinkWidth(dev), | |
'busid': nv.nvmlDeviceGetPciInfo(dev).busId, | |
'deviceid': nv.nvmlDeviceGetPciInfo(dev).pciDeviceId, | |
}, | |
'memory': { | |
'total': round(nv.nvmlDeviceGetMemoryInfo(dev).total/1024/1024, 2), | |
'free': round(nv.nvmlDeviceGetMemoryInfo(dev).free/1024/1024,2), | |
'used': round(nv.nvmlDeviceGetMemoryInfo(dev).used/1024/1024,2), | |
}, | |
'clock': { # gpu, sm, memory | |
'gpu': [nv.nvmlDeviceGetClockInfo(dev, 0), nv.nvmlDeviceGetMaxClockInfo(dev, 0)], | |
'sm': [nv.nvmlDeviceGetClockInfo(dev, 1), nv.nvmlDeviceGetMaxClockInfo(dev, 1)], | |
'memory': [nv.nvmlDeviceGetClockInfo(dev, 2), nv.nvmlDeviceGetMaxClockInfo(dev, 2)], | |
}, | |
'load': { | |
'gpu': round(nv.nvmlDeviceGetUtilizationRates(dev).gpu), | |
'memory': round(nv.nvmlDeviceGetUtilizationRates(dev).memory), | |
'temp': nv.nvmlDeviceGetTemperature(dev, 0), | |
'fan': nv.nvmlDeviceGetFanSpeed(dev), | |
}, | |
'power': [round(nv.nvmlDeviceGetPowerUsage(dev)/1000, 2), round(nv.nvmlDeviceGetEnforcedPowerLimit(dev)/1000, 2)], | |
'state': get_reason(nv.nvmlDeviceGetCurrentClocksThrottleReasons(dev)), | |
} | |
devices.append(device) | |
# log.debug(f'nmvl: {devices}') | |
return devices | |
except Exception: | |
# log.debug(f'nvml failed: {e}') | |
nvml_ok = False | |
return [] | |