# Copyright (c) OpenMMLab. All rights reserved. import argparse import importlib import os import os.path as osp import pkgutil import sys import tempfile from multiprocessing import Pool from pathlib import Path import numpy as np import pandas as pd # host_addr = 'https://gitee.com/open-mmlab' host_addr = 'https://github.com/open-mmlab' tools_list = ['tools', '.dev_scripts'] proxy_names = { 'mmdet': 'mmdetection', 'mmseg': 'mmsegmentation', 'mmcls': 'mmclassification' } merge_module_keys = {'mmcv': ['mmengine']} # exclude_prefix = {'mmcv': ['{_k}') table_data.append((registry_name, registry_strings)) # sort the data list table_data = sorted(table_data, key=lambda x: len(x[1])) # split multi parts table_data_multi_parts = [] for (registry_name, registry_strings) in table_data: multi_parts = False if len(registry_strings) > max_size_per_cell: multi_parts = True for cell_idx, registry_cell in enumerate( divide_list_into_groups(registry_strings, max_size_per_cell)): registry_str = ''.join(registry_cell.tolist()) registry_str = f'' table_data_multi_parts.append([ registry_name if not multi_parts else f'{registry_name} (part {cell_idx + 1})', registry_str ]) for table_data in divide_list_into_groups(table_data_multi_parts, max_col_per_row): table_data = list(zip(*table_data.tolist())) html += dataframe_to_html( pd.DataFrame([table_data[1]], columns=table_data[0])) if html: html = f'
{title}
\n{html}' html = f'
{html}
\n' return html def tools_to_html(tools_dict, repo_name=''): def _recurse(_dict, _connector, _result): assert isinstance(_dict, dict), \ f'unknown recurse type: {_dict} ({type(_dict)})' for _k, _v in _dict.items(): if _v is None: if _connector not in _result: _result[_connector] = [] _result[_connector].append(_k) else: _recurse(_v, osp.join(_connector, _k), _result) table_data = {} title = f'{capitalize(repo_name)} Tools' _recurse(tools_dict, '', table_data) return registries_to_html(table_data, title) def dataframe_to_html(dataframe): styler = dataframe.style styler = styler.hide(axis='index') styler = styler.format(na_rep='-') styler = styler.set_properties(**{ 'text-align': 'left', 'align': 'center', 'vertical-align': 'top' }) styler = styler.set_table_styles([{ 'selector': 'thead th', 'props': 'align:center;text-align:center;vertical-align:bottom' }]) html = styler.to_html() html = f'
\n{html}
' return html def generate_markdown_by_repository(repo_name, module_name, branch, pulldir, throw_error=False): # add the pull dir to the system path so that it can be found if pulldir not in sys.path: sys.path.insert(0, pulldir) module_list, error_dict = load_modules_from_dir( module_name, pulldir, throw_error=throw_error) registries_tree = get_registries_from_modules(module_list) if error_dict: error_dict_name = 'error_modules' assert (error_dict_name not in registries_tree), \ f'duplicate module name was found: {error_dict_name}' registries_tree.update({error_dict_name: error_dict}) # get the tools files for tools_name in tools_list: assert (tools_name not in registries_tree), \ f'duplicate tools name was found: {tools_name}' tools_tree = osp.join(pulldir, tools_name) tools_tree = get_scripts_from_dir(tools_tree) registries_tree.update({tools_name: tools_tree}) # print_tree(registries_tree) # get registries markdown string module_registries = registries_tree.get(module_name, {}) for merge_key in merge_module_keys.get(module_name, []): merge_dict = registries_tree.get(merge_key, {}) merge_registries(module_registries, merge_dict) for exclude_key in exclude_prefix.get(module_name, []): exclude_registries(module_registries, exclude_key) markdown_str = registries_to_html( module_registries, title=f'{capitalize(repo_name)} Module Components') # get tools markdown string tools_registries = {} for tools_name in tools_list: tools_registries.update( {tools_name: registries_tree.get(tools_name, {})}) markdown_str += tools_to_html(tools_registries, repo_name=repo_name) version_str = get_version_from_module_name(module_name, branch) title_str = f'\n\n## {capitalize(repo_name)}{version_str}\n' # remove the pull dir from system path if pulldir in sys.path: sys.path.remove(pulldir) return f'{title_str}{markdown_str}' def parse_args(): parser = argparse.ArgumentParser( description='print registries in openmmlab repositories') parser.add_argument( '-r', '--repositories', nargs='+', default=['mmdet', 'mmcls', 'mmseg', 'mmengine', 'mmcv'], type=str, help='git repositories name in OpenMMLab') parser.add_argument( '-b', '--branches', nargs='+', default=['3.x', '1.x', '1.x', 'main', '2.x'], type=str, help='the branch names of git repositories, the length of branches ' 'must be same as the length of repositories') parser.add_argument( '-o', '--out', type=str, default='.', help='output path of the file') parser.add_argument( '--throw-error', action='store_true', default=False, help='whether to throw error when trying to import modules') args = parser.parse_args() return args # TODO: Refine def main(): args = parse_args() repositories = args.repositories branches = args.branches assert isinstance(repositories, list), \ 'Type of repositories must be list' if branches is None: branches = [None] * len(repositories) assert isinstance(branches, list) and \ len(branches) == len(repositories), \ 'The length of branches must be same as ' \ 'that of repositories' assert isinstance(args.out, str), \ 'The type of output path must be string' # save path of file mkdir_or_exist(args.out) save_path = osp.join(args.out, 'registries_info.md') with tempfile.TemporaryDirectory() as tmpdir: # multi process init pool = Pool(processes=len(repositories)) multi_proc_input_list = [] multi_proc_output_list = [] # get the git repositories for branch, repository in zip(branches, repositories): repo_name, module_name = parse_repo_name(repository) pulldir = osp.join(tmpdir, f'tmp_{repo_name}') git_pull_branch( repo_name=repo_name, branch_name=branch, pulldir=pulldir) multi_proc_input_list.append( (repo_name, module_name, branch, pulldir, args.throw_error)) print('starting the multi process to get the registries') for multi_proc_input in multi_proc_input_list: multi_proc_output_list.append( pool.apply_async(generate_markdown_by_repository, multi_proc_input)) pool.close() pool.join() with open(save_path, 'w', encoding='utf-8') as fw: fw.write(f'{markdown_title}\n') for multi_proc_output in multi_proc_output_list: markdown_str = multi_proc_output.get() fw.write(f'{markdown_str}\n') print(f'saved registries to the path: {save_path}') if __name__ == '__main__': main()