File size: 5,336 Bytes
8273cb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import os

from omegaconf import OmegaConf

from mmpt.utils import recursive_config, overwrite_dir
from mmpt_cli.localjob import LocalJob


class JobLauncher(object):
    JOB_CONFIG = {
        "local": LocalJob,
    }

    def __init__(self, yaml_file):
        self.yaml_file = yaml_file
        job_key = "local"

        if yaml_file.endswith(".yaml"):
            config = recursive_config(yaml_file)
            if config.task_type is not None:
                job_key = config.task_type.split("_")[0]
        else:
            raise ValueError("unknown extension of job file:", yaml_file)
        self.job_key = job_key

    def __call__(self, job_type=None, dryrun=False):
        if job_type is not None:
            self.job_key = job_type.split("_")[0]
        print("[JobLauncher] job_key", self.job_key)
        job = JobLauncher.JOB_CONFIG[self.job_key](
            self.yaml_file, job_type=job_type, dryrun=dryrun)
        return job.submit()


class Pipeline(object):
    """a job that loads yaml config."""

    def __init__(self, fn):
        """
        load a yaml config of a job and save generated configs as yaml for each task.
        return: a list of files to run as specified by `run_task`.
        """
        if fn.endswith(".py"):
            # a python command.
            self.backend = "python"
            self.run_yamls = [fn]
            return

        job_config = recursive_config(fn)
        if job_config.base_dir is None:  # single file job config.
            self.run_yamls = [fn]
            return

        self.project_dir = os.path.join("projects", job_config.project_dir)
        self.run_dir = os.path.join("runs", job_config.project_dir)

        if job_config.run_task is not None:
            run_yamls = []
            for stage in job_config.run_task:
                # each stage can have multiple tasks running in parallel.
                if OmegaConf.is_list(stage):
                    stage_yamls = []
                    for task_file in stage:
                        stage_yamls.append(
                            os.path.join(self.project_dir, task_file))
                    run_yamls.append(stage_yamls)
                else:
                    run_yamls.append(os.path.join(self.project_dir, stage))
            self.run_yamls = run_yamls
        configs_to_save = self._overwrite_task(job_config)
        self._save_configs(configs_to_save)

    def __getitem__(self, idx):
        yaml_files = self.run_yamls[idx]
        if isinstance(yaml_files, list):
            return [JobLauncher(yaml_file) for yaml_file in yaml_files]
        return [JobLauncher(yaml_files)]

    def __len__(self):
        return len(self.run_yamls)

    def _save_configs(self, configs_to_save: dict):
        # save
        os.makedirs(self.project_dir, exist_ok=True)
        for config_file in configs_to_save:
            config = configs_to_save[config_file]
            print("saving", config_file)
            OmegaConf.save(config=config, f=config_file)

    def _overwrite_task(self, job_config):
        configs_to_save = {}
        self.base_project_dir = os.path.join("projects", job_config.base_dir)
        self.base_run_dir = os.path.join("runs", job_config.base_dir)

        for config_sets in job_config.task_group:
            overwrite_config = job_config.task_group[config_sets]
            if (
                overwrite_config.task_list is None
                or len(overwrite_config.task_list) == 0
            ):
                print(
                    "[warning]",
                    job_config.task_group,
                    "has no task_list specified.")
            # we don't want this added to a final config.
            task_list = overwrite_config.pop("task_list", None)
            for config_file in task_list:
                config_file_path = os.path.join(
                    self.base_project_dir, config_file)
                config = recursive_config(config_file_path)
                # overwrite it.
                if overwrite_config:
                    config = OmegaConf.merge(config, overwrite_config)
                overwrite_dir(config, self.run_dir, basedir=self.base_run_dir)
                save_file_path = os.path.join(self.project_dir, config_file)
                configs_to_save[save_file_path] = config
        return configs_to_save


def main(args):
    job_type = args.jobtype if args.jobtype else None
    # parse multiple pipelines.
    pipelines = [Pipeline(fn) for fn in args.yamls.split(",")]

    for pipe_id, pipeline in enumerate(pipelines):
        if not hasattr(pipeline, "project_dir"):
            for job in pipeline[0]:
                job(job_type=job_type, dryrun=args.dryrun)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("yamls", type=str)
    parser.add_argument(
        "--dryrun",
        action="store_true",
        help="run config and prepare to submit without launch the job.",
    )
    parser.add_argument(
        "--jobtype", type=str, default="",
        help="force to run jobs as specified.")
    args = parser.parse_args()
    main(args)