File size: 1,584 Bytes
f1d3dc6 aa5ff8c f1d3dc6 aa5ff8c f1d3dc6 aa5ff8c f1d3dc6 aa5ff8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
"""
Nanotron Inference Script
Usage:
```
export CUDA_DEVICE_MAX_CONNECTIONS=1 # important for some distributed operations
torchrun --nproc_per_node=8 run_evals.py --checkpoint-config-path ./pretrained/Mistral-7B-v0.1/config.yaml \
--lighteval-override ./lighteval_eval_config.yaml
```
"""
# flake8: noqa: C901
import argparse
from nanotron.config import Config
from modeling_mistral import MistralForTraining
from config_mistral import MistralConfig
from lighteval.main_nanotron import main
def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--checkpoint-config-path",
type=str,
required=True,
help="Path to the brr checkpoint YAML or python config file, potentially on S3",
)
parser.add_argument(
"--lighteval-override",
type=str,
help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
)
parser.add_argument(
"--tokenizer",
type=str,
help="Local or hub path of an optional tokenizer (if not indicated in the checkpoint)",
)
parser.add_argument(
"--cache-dir",
type=str,
default=None,
help="Cache directory",
)
return parser
if __name__ == "__main__":
parser = get_parser()
args, unknowns = parser.parse_known_args()
main(
args.checkpoint_config_path,
args.lighteval_override,
args.cache_dir,
config_cls=Config,
model_config_cls=MistralConfig,
model_cls=MistralForTraining
)
|