|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
This script is used to get the files against which we will run doc testing. |
|
This uses `tests_fetcher.get_all_doctest_files` then groups the test files by their directory paths. |
|
|
|
The files in `docs/source/en/model_doc` or `docs/source/en/tasks` are **NOT** grouped together with other files in the |
|
same directory: the objective is to run doctest against them in independent GitHub Actions jobs. |
|
|
|
Assume we are under `transformers` root directory: |
|
To get a map (dictionary) between directory (or file) paths and the corresponding files |
|
```bash |
|
python utils/split_doctest_jobs.py |
|
``` |
|
or to get a list of lists of directory (or file) paths |
|
```bash |
|
python utils/split_doctest_jobs.py --only_return_keys --num_splits 4 |
|
``` |
|
(this is used to allow GitHub Actions to generate more than 256 jobs using matrix) |
|
""" |
|
|
|
import argparse |
|
from collections import defaultdict |
|
from pathlib import Path |
|
|
|
from tests_fetcher import get_all_doctest_files |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--only_return_keys", |
|
action="store_true", |
|
help="if to only return the keys (which is a list of list of files' directory or file paths).", |
|
) |
|
parser.add_argument( |
|
"--num_splits", |
|
type=int, |
|
default=1, |
|
help="the number of splits into which the (flat) list of direcotry/file paths will be split. This has effect only if `only_return_keys` is `True`.", |
|
) |
|
args = parser.parse_args() |
|
|
|
all_doctest_files = get_all_doctest_files() |
|
|
|
raw_test_collection_map = defaultdict(list) |
|
|
|
for file in all_doctest_files: |
|
file_dir = "/".join(Path(file).parents[0].parts) |
|
raw_test_collection_map[file_dir].append(file) |
|
|
|
refined_test_collection_map = {} |
|
for file_dir in raw_test_collection_map.keys(): |
|
if file_dir in ["docs/source/en/model_doc", "docs/source/en/tasks"]: |
|
for file in raw_test_collection_map[file_dir]: |
|
refined_test_collection_map[file] = file |
|
else: |
|
refined_test_collection_map[file_dir] = " ".join(sorted(raw_test_collection_map[file_dir])) |
|
|
|
sorted_file_dirs = sorted(refined_test_collection_map.keys()) |
|
|
|
test_collection_map = {} |
|
for file_dir in sorted_file_dirs: |
|
test_collection_map[file_dir] = refined_test_collection_map[file_dir] |
|
|
|
num_jobs = len(test_collection_map) |
|
num_jobs_per_splits = num_jobs // args.num_splits |
|
|
|
file_directory_splits = [] |
|
end = 0 |
|
for idx in range(args.num_splits): |
|
start = end |
|
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0) |
|
file_directory_splits.append(sorted_file_dirs[start:end]) |
|
|
|
if args.only_return_keys: |
|
print(file_directory_splits) |
|
else: |
|
print(dict(test_collection_map)) |
|
|