Spaces:

facebook
/

llm-transparency-tool-demo

Running

App Files Files Community

llm-transparency-tool-demo / llm_transparency_tool /server /monitor.py

mahnerak

Initial Commit 🚀

ce00289 5 months ago

raw history blame contribute delete

No virus

2.88 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	import torch
	import streamlit as st
	from pyinstrument import Profiler
	from typing import Dict
	import pandas as pd


	@st.cache_resource(max_entries=1, show_spinner=False)
	def init_gpu_memory():
	"""
	When CUDA is initialized, it occupies some memory on the GPU thus this overhead
	can sometimes make it difficult to understand how much memory is actually used by
	the model.

	This function is used to initialize CUDA and measure the overhead.
	"""
	if not torch.cuda.is_available():
	return {}

	# lets init torch gpu for a moment
	gpu_memory_overhead = {}
	for i in range(torch.cuda.device_count()):
	torch.ones(1).cuda(i)
	free, total = torch.cuda.mem_get_info(i)
	occupied = total - free
	gpu_memory_overhead[i] = occupied

	return gpu_memory_overhead


	class SystemMonitor:
	"""
	This class is used to monitor the system resources such as GPU memory and CPU
	usage. It uses the pyinstrument library to profile the code and measure the
	execution time of different parts of the code.
	"""

	def __init__(
	self,
	enabled: bool = False,
	):
	self.enabled = enabled
	self.profiler = Profiler()
	self.overhead: Dict[int, int]

	def __enter__(self):
	if not self.enabled:
	return

	self.overhead = init_gpu_memory()

	self.profiler.__enter__()

	def __exit__(self, exc_type, exc_value, traceback):
	if not self.enabled:
	return

	self.profiler.__exit__(exc_type, exc_value, traceback)

	self.report_gpu_usage()
	self.report_profiler()

	with st.expander("Session state"):
	st.write(st.session_state)

	return None

	def report_gpu_usage(self):

	if not torch.cuda.is_available():
	return

	data = []

	for i in range(torch.cuda.device_count()):
	free, total = torch.cuda.mem_get_info(i)
	occupied = total - free
	data.append({
	'overhead': self.overhead[i],
	'occupied': occupied - self.overhead[i],
	'free': free,
	})
	df = pd.DataFrame(data, columns=["overhead", "occupied", "free"])

	with st.sidebar.expander("System"):
	st.write("GPU memory on server")
	df /= 1024 ** 3 # Convert to GB
	st.bar_chart(df, width=200, height=200, color=["#fefefe", "#84c9ff", "#fe2b2b"])

	def report_profiler(self):
	html_code = self.profiler.output_html()
	with st.expander("Profiler", expanded=False):
	st.components.v1.html(html_code, height=1000, scrolling=True)