Spaces:

facebook
/

llm-transparency-tool-demo

Running

File size: 2,877 Bytes

ce00289

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import torch
import streamlit as st
from pyinstrument import Profiler
from typing import Dict
import pandas as pd


@st.cache_resource(max_entries=1, show_spinner=False)
def init_gpu_memory():
    """
    When CUDA is initialized, it occupies some memory on the GPU thus this overhead
    can sometimes make it difficult to understand how much memory is actually used by
    the model.

    This function is used to initialize CUDA and measure the overhead.
    """
    if not torch.cuda.is_available():
        return {}

    # lets init torch gpu for a moment
    gpu_memory_overhead = {}
    for i in range(torch.cuda.device_count()):
        torch.ones(1).cuda(i)
        free, total = torch.cuda.mem_get_info(i)
        occupied = total - free
        gpu_memory_overhead[i] = occupied

    return gpu_memory_overhead


class SystemMonitor:
    """
    This class is used to monitor the system resources such as GPU memory and CPU
    usage. It uses the pyinstrument library to profile the code and measure the
    execution time of different parts of the code.
    """

    def __init__(
        self,
        enabled: bool = False,
    ):
        self.enabled = enabled
        self.profiler = Profiler()
        self.overhead: Dict[int, int]

    def __enter__(self):
        if not self.enabled:
            return

        self.overhead = init_gpu_memory()

        self.profiler.__enter__()

    def __exit__(self, exc_type, exc_value, traceback):
        if not self.enabled:
            return

        self.profiler.__exit__(exc_type, exc_value, traceback)

        self.report_gpu_usage()
        self.report_profiler()

        with st.expander("Session state"):
            st.write(st.session_state)

        return None

    def report_gpu_usage(self):

        if not torch.cuda.is_available():
            return

        data = []

        for i in range(torch.cuda.device_count()):
            free, total = torch.cuda.mem_get_info(i)
            occupied = total - free
            data.append({
                'overhead': self.overhead[i],
                'occupied': occupied - self.overhead[i],
                'free': free,
            })
        df = pd.DataFrame(data, columns=["overhead", "occupied", "free"])

        with st.sidebar.expander("System"):
            st.write("GPU memory on server")
            df /= 1024 ** 3  # Convert to GB
            st.bar_chart(df, width=200, height=200, color=["#fefefe", "#84c9ff", "#fe2b2b"])

    def report_profiler(self):
        html_code = self.profiler.output_html()
        with st.expander("Profiler", expanded=False):
            st.components.v1.html(html_code, height=1000, scrolling=True)