File size: 1,144 Bytes
c19ca42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import psutil
import torch
from modules import shared, errors

fail_once = False

def memory_stats():
    global fail_once # pylint: disable=global-statement
    def gb(val: float):
        return round(val / 1024 / 1024 / 1024, 2)

    mem = {}
    try:
        process = psutil.Process(os.getpid())
        res = process.memory_info()
        ram_total = 100 * res.rss / process.memory_percent()
        ram = { 'used': gb(res.rss), 'total': gb(ram_total) }
        mem.update({ 'ram': ram })
    except Exception as e:
        if not fail_once:
            shared.log.error('Memory stats: {e}')
            errors.display(e, 'Memory stats')
            fail_once = True
        mem.update({ 'ram': str(e) })
    try:
        s = torch.cuda.mem_get_info()
        gpu = { 'used': gb(s[1] - s[0]), 'total': gb(s[1]) }
        s = dict(torch.cuda.memory_stats())
        if s['num_ooms'] > 0:
            shared.state.oom = True
        mem.update({
            'gpu': gpu,
            'retries': s['num_alloc_retries'],
            'oom': s['num_ooms']
        })
        return mem
    except Exception:
        pass
    return mem