Spaces:

zetavg
/

LLaMA-LoRA-Tuner-UI-Demo

Runtime error

LLaMA-LoRA-Tuner-UI-Demo / llama_lora /globals.py

zetavg

finetune loss chart: use steps as the x axis if possible

6947876 unverified over 1 year ago

5.95 kB

	import importlib
	import os
	import subprocess
	import psutil
	import math

	from typing import Any, Dict, List, Optional, Tuple, Union
	from transformers import TrainingArguments
	from numba import cuda
	import nvidia_smi

	from .dynamic_import import dynamic_import
	from .config import Config
	from .utils.lru_cache import LRUCache
	from .utils.eta_predictor import ETAPredictor


	class Global:
	"""
	A singleton class holding global states.
	"""

	version: Union[str, None] = None

	base_model_name: str = ""
	tokenizer_name: Union[str, None] = None

	# Functions
	inference_generate_fn: Any
	finetune_train_fn: Any

	# Training Control
	should_stop_training: bool = False

	# Training Status
	is_train_starting: bool = False
	is_training: bool = False
	train_started_at: float = 0.0
	training_error_message: Union[str, None] = None
	training_error_detail: Union[str, None] = None
	training_total_epochs: int = 0
	training_current_epoch: float = 0.0
	training_total_steps: int = 0
	training_current_step: int = 0
	training_progress: float = 0.0
	training_log_history: List[Any] = []
	training_status_text: str = ""
	training_eta_predictor = ETAPredictor()
	training_eta: Union[int, None] = None
	training_args: Union[TrainingArguments, None] = None
	train_output: Union[None, Any] = None
	train_output_str: Union[None, str] = None
	training_params_info_text: str = ""

	# Generation Control
	should_stop_generating: bool = False
	generation_force_stopped_at: Union[float, None] = None

	# Model related
	loaded_models = LRUCache(1)
	loaded_tokenizers = LRUCache(1)
	new_base_model_that_is_ready_to_be_used = None
	name_of_new_base_model_that_is_ready_to_be_used = None

	# GPU Info
	gpu_cc = None # GPU compute capability
	gpu_sms = None # GPU total number of SMs
	gpu_total_cores = None # GPU total cores
	gpu_total_memory = None


	def initialize_global():
	Global.base_model_name = Config.default_base_model_name
	commit_hash = get_git_commit_hash()

	if commit_hash:
	Global.version = commit_hash[:8]

	if not Config.ui_dev_mode:
	ModelLRUCache = dynamic_import('.utils.model_lru_cache').ModelLRUCache
	Global.loaded_models = ModelLRUCache(1)
	Global.inference_generate_fn = dynamic_import('.lib.inference').generate
	Global.finetune_train_fn = dynamic_import('.lib.finetune').train
	load_gpu_info()


	def get_package_dir():
	current_file_path = os.path.abspath(__file__)
	parent_directory_path = os.path.dirname(current_file_path)
	return os.path.abspath(parent_directory_path)


	def get_git_commit_hash():
	try:
	original_cwd = os.getcwd()
	project_dir = get_package_dir()
	try:
	os.chdir(project_dir)
	commit_hash = subprocess.check_output(
	['git', 'rev-parse', 'HEAD']).strip().decode('utf-8')
	return commit_hash
	except Exception as e:
	print(f"Cannot get git commit hash: {e}")
	finally:
	os.chdir(original_cwd)
	except Exception as e:
	print(f"Cannot get git commit hash: {e}")


	def load_gpu_info():
	# cuda = importlib.import_module('numba').cuda
	# nvidia_smi = importlib.import_module('nvidia_smi')
	print("")
	try:
	cc_cores_per_SM_dict = {
	(2, 0): 32,
	(2, 1): 48,
	(3, 0): 192,
	(3, 5): 192,
	(3, 7): 192,
	(5, 0): 128,
	(5, 2): 128,
	(6, 0): 64,
	(6, 1): 128,
	(7, 0): 64,
	(7, 5): 64,
	(8, 0): 64,
	(8, 6): 128,
	(8, 9): 128,
	(9, 0): 128
	}
	# the above dictionary should result in a value of "None" if a cc match
	# is not found. The dictionary needs to be extended as new devices become
	# available, and currently does not account for all Jetson devices
	device = cuda.get_current_device()
	device_sms = getattr(device, 'MULTIPROCESSOR_COUNT')
	device_cc = device.compute_capability
	cores_per_sm = cc_cores_per_SM_dict.get(device_cc)
	total_cores = cores_per_sm*device_sms
	print("GPU compute capability: ", device_cc)
	print("GPU total number of SMs: ", device_sms)
	print("GPU total cores: ", total_cores)
	Global.gpu_cc = device_cc
	Global.gpu_sms = device_sms
	Global.gpu_total_cores = total_cores

	nvidia_smi.nvmlInit()
	handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
	info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
	total_memory = info.total

	total_memory_mb = total_memory / (1024 ** 2)
	total_memory_gb = total_memory / (1024 ** 3)

	# Print the memory size
	print(
	f"GPU total memory: {total_memory} bytes ({total_memory_mb:.2f} MB) ({total_memory_gb:.2f} GB)")
	Global.gpu_total_memory = total_memory

	available_cpu_ram = psutil.virtual_memory().available
	available_cpu_ram_mb = available_cpu_ram / (1024 ** 2)
	available_cpu_ram_gb = available_cpu_ram / (1024 ** 3)
	print(
	f"CPU available memory: {available_cpu_ram} bytes ({available_cpu_ram_mb:.2f} MB) ({available_cpu_ram_gb:.2f} GB)")
	preserve_loaded_models_count = math.floor(
	(available_cpu_ram * 0.8) / total_memory) - 1
	if preserve_loaded_models_count > 1:
	ModelLRUCache = dynamic_import('.utils.model_lru_cache').ModelLRUCache
	print(
	f"Will keep {preserve_loaded_models_count} offloaded models in CPU RAM.")
	Global.loaded_models = ModelLRUCache(preserve_loaded_models_count)
	Global.loaded_tokenizers = LRUCache(preserve_loaded_models_count)

	except Exception as e:
	print(f"Notice: cannot get GPU info: {e}")

	print("")