|
""" |
|
Nanotron Inference Script |
|
|
|
Usage: |
|
``` |
|
export CUDA_DEVICE_MAX_CONNECTIONS=1 # important for some distributed operations |
|
torchrun --nproc_per_node=8 run_evals.py --checkpoint-config-path ./pretrained/Mistral-7B-v0.1/config.yaml \ |
|
--lighteval-override ./lighteval_eval_config.yaml |
|
``` |
|
""" |
|
|
|
import argparse |
|
|
|
from nanotron.config import Config |
|
from modeling_mistral import MistralForTraining |
|
from config_mistral import MistralConfig |
|
|
|
from lighteval.main_nanotron import main |
|
|
|
def get_parser(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--checkpoint-config-path", |
|
type=str, |
|
required=True, |
|
help="Path to the brr checkpoint YAML or python config file, potentially on S3", |
|
) |
|
parser.add_argument( |
|
"--lighteval-override", |
|
type=str, |
|
help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config", |
|
) |
|
parser.add_argument( |
|
"--tokenizer", |
|
type=str, |
|
help="Local or hub path of an optional tokenizer (if not indicated in the checkpoint)", |
|
) |
|
parser.add_argument( |
|
"--cache-dir", |
|
type=str, |
|
default=None, |
|
help="Cache directory", |
|
) |
|
|
|
return parser |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
parser = get_parser() |
|
args, unknowns = parser.parse_known_args() |
|
main( |
|
args.checkpoint_config_path, |
|
args.lighteval_override, |
|
args.cache_dir, |
|
config_cls=Config, |
|
model_config_cls=MistralConfig, |
|
model_cls=MistralForTraining |
|
) |
|
|