mistral-nanotron / run_evals.py
thomwolf's picture
thomwolf HF staff
update all
aa5ff8c
"""
Nanotron Inference Script
Usage:
```
export CUDA_DEVICE_MAX_CONNECTIONS=1 # important for some distributed operations
torchrun --nproc_per_node=8 run_evals.py --checkpoint-config-path ./pretrained/Mistral-7B-v0.1/config.yaml \
--lighteval-override ./lighteval_eval_config.yaml
```
"""
# flake8: noqa: C901
import argparse
from nanotron.config import Config
from modeling_mistral import MistralForTraining
from config_mistral import MistralConfig
from lighteval.main_nanotron import main
def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--checkpoint-config-path",
type=str,
required=True,
help="Path to the brr checkpoint YAML or python config file, potentially on S3",
)
parser.add_argument(
"--lighteval-override",
type=str,
help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
)
parser.add_argument(
"--tokenizer",
type=str,
help="Local or hub path of an optional tokenizer (if not indicated in the checkpoint)",
)
parser.add_argument(
"--cache-dir",
type=str,
default=None,
help="Cache directory",
)
return parser
if __name__ == "__main__":
parser = get_parser()
args, unknowns = parser.parse_known_args()
main(
args.checkpoint_config_path,
args.lighteval_override,
args.cache_dir,
config_cls=Config,
model_config_cls=MistralConfig,
model_cls=MistralForTraining
)