File size: 1,584 Bytes
f1d3dc6
 
 
 
 
 
 
 
 
 
 
 
 
aa5ff8c
f1d3dc6
 
 
aa5ff8c
f1d3dc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa5ff8c
f1d3dc6
 
 
 
 
 
 
 
 
 
aa5ff8c
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""
Nanotron Inference Script

Usage:
```
export CUDA_DEVICE_MAX_CONNECTIONS=1 # important for some distributed operations
torchrun --nproc_per_node=8 run_evals.py --checkpoint-config-path ./pretrained/Mistral-7B-v0.1/config.yaml \
    --lighteval-override ./lighteval_eval_config.yaml
```
"""
# flake8: noqa: C901
import argparse

from nanotron.config import Config
from modeling_mistral import MistralForTraining
from config_mistral import MistralConfig

from lighteval.main_nanotron import main

def get_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--checkpoint-config-path",
        type=str,
        required=True,
        help="Path to the brr checkpoint YAML or python config file, potentially on S3",
    )
    parser.add_argument(
        "--lighteval-override",
        type=str,
        help="Path to an optional YAML or python Lighteval config to override part of the checkpoint Lighteval config",
    )
    parser.add_argument(
        "--tokenizer",
        type=str,
        help="Local or hub path of an optional tokenizer (if not indicated in the checkpoint)",
    )
    parser.add_argument(
        "--cache-dir",
        type=str,
        default=None,
        help="Cache directory",
    )

    return parser



if __name__ == "__main__":
    parser = get_parser()
    args, unknowns = parser.parse_known_args()
    main(
        args.checkpoint_config_path,
        args.lighteval_override,
        args.cache_dir,
        config_cls=Config,
        model_config_cls=MistralConfig,
        model_cls=MistralForTraining
    )