nanotron
/

mistral-nanotron

Model card Files Files and versions Community

mistral-nanotron / run_evals.py

thomwolf's picture

thomwolf HF staff

update all

aa5ff8c 5 months ago

raw history blame contribute delete

No virus

1.58 kB

	"""
	Nanotron Inference Script

	Usage:
	```
	export CUDA_DEVICE_MAX_CONNECTIONS=1 # important for some distributed operations
	torchrun --nproc_per_node=8 run_evals.py --checkpoint-config-path ./pretrained/Mistral-7B-v0.1/config.yaml \
	--lighteval-override ./lighteval_eval_config.yaml
	```
	"""
	# flake8: noqa: C901
	import argparse

	from nanotron.config import Config
	from modeling_mistral import MistralForTraining
	from config_mistral import MistralConfig

	from lighteval.main_nanotron import main

	def get_parser():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--checkpoint-config-path",
	type=str,
	required=True,
	help="Path to the brr checkpoint YAML or python config file, potentially on S3",
	)
	parser.add_argument(
	"--lighteval-override",
	type=str,
	help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
	)
	parser.add_argument(
	"--tokenizer",
	type=str,
	help="Local or hub path of an optional tokenizer (if not indicated in the checkpoint)",
	)
	parser.add_argument(
	"--cache-dir",
	type=str,
	default=None,
	help="Cache directory",
	)

	return parser



	if __name__ == "__main__":
	parser = get_parser()
	args, unknowns = parser.parse_known_args()
	main(
	args.checkpoint_config_path,
	args.lighteval_override,
	args.cache_dir,
	config_cls=Config,
	model_config_cls=MistralConfig,
	model_cls=MistralForTraining
	)