import json import pathlib import argparse import re TABLE_OF_CONTENT = r"#+\s+Table of content:?" def find_tc_in_cell(cell): tc_cell = None tc_line_number = None for i, line in enumerate(cell["source"]): if re.match(TABLE_OF_CONTENT, line): tc_cell = cell tc_line_number = i break return tc_cell, tc_line_number def create_title_for_tc(title): title_for_tc = title.lstrip("#").lstrip() title_for_tc = re.sub(r"[\[\]\n]", "", title_for_tc) title_for_tc = re.sub(r"\(http.*\)", "", title_for_tc) return title_for_tc def create_link_for_tc(title): link = re.sub(r"[`$^]", "", title) link = link.replace(" ", "-") return link def remove_old_tc(cell, idx): if cell is not None: for line in cell["source"][idx:]: if re.match(r"\s*-\s*\[.*\]\(#.*\).*", line) or re.match(TABLE_OF_CONTENT, line): cell["source"].remove(line) return cell def get_tc_line(title, title_for_tc, link, tc_list, titles_list): # calc indents for Table of content try: indents_num = (title.index(" ") - 2) * 4 except: indents_num = -1 if len(tc_list) == 0 or indents_num < 0: # when first list item have more than 1 indents the alignment would be broken indents_num = 0 elif indents_num - tc_list[-1].index("-") > 4: # when previous list item have n indents and current have n+4+1 it broke the alignment indents_num = tc_list[-1].index("-") + 4 elif indents_num != tc_list[-1].index("-") and title.index(" ") == titles_list[-1].index(" "): # when we have several titles with same wrong alignments indents_num = tc_list[-1].index("-") indents = " " * indents_num + "-" + " " line = f"{indents}[{title_for_tc}](#{link})\n" return line def is_ref_to_top_exists(cell, idx): ref_exists = False for row in cell[idx + 1 :]: row = row.strip() if "[back to top ⬆️](#Table-of-content" in row: ref_exists = True break elif row != "": # content of block started break return ref_exists def is_markdown(cell): return "markdown" == cell["cell_type"] def is_title(line): return line.strip().startswith("#") and line.strip().lstrip("#").lstrip() def generate_table_of_content(notebook_path: pathlib.Path): table_of_content = [] table_of_content_cell = None table_of_content_cell_idx = None with open(notebook_path, "r", encoding="utf-8") as notebook_file: notebook_json = json.load(notebook_file) if not notebook_json["cells"]: return table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(notebook_json["cells"][0]) all_titles = [] for cell in filter(is_markdown, notebook_json["cells"][1:]): if table_of_content_cell is None: table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(cell) if not table_of_content_cell is None: continue titles = [line for line in cell["source"] if is_title(line)] for title in titles: idx = cell["source"].index(title) if not is_ref_to_top_exists(cell["source"], idx): if not title.endswith("\n"): cell["source"].insert(idx, title + "\n") cell["source"].insert(idx + 1, "[back to top ⬆️](#Table-of-contents:)\n") cell["source"].insert(idx + 2, "") title = title.strip() title_for_tc = create_title_for_tc(title) link_for_tc = create_link_for_tc(title_for_tc) new_line = get_tc_line(title, title_for_tc, link_for_tc, table_of_content, all_titles) if table_of_content.count(new_line) > 1: print( f'WARINING: the title "{title_for_tc}" has already used in titles.\n' + "Navigation will work inccorect, the link will only point to " + "the first encountered title" ) table_of_content.append(new_line) all_titles.append(title) table_of_content = ["\n", "#### Table of contents:\n\n"] + table_of_content + ["\n"] if table_of_content_cell is not None: table_of_content_cell = remove_old_tc(table_of_content_cell, table_of_content_cell_idx) if table_of_content_cell is not None: table_of_content_cell["source"].extend(table_of_content) else: notebook_json["cells"][0]["source"].extend(table_of_content) with open(notebook_path, "w", encoding="utf-8") as in_f: json.dump(notebook_json, in_f, ensure_ascii=False, indent=1) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "-s", "--source", help="Please, specify notebook or folder with notebooks.\ Table of content will be added or modified in each.", required=True, ) args = parser.parse_args() path_to_source = pathlib.Path(args.source) if not path_to_source.exists(): print(f"Incorrect path to notebook(s) {path_to_source}") exit() elif path_to_source.is_file(): generate_table_of_content(path_to_source) elif path_to_source.is_dir(): for notebook in path_to_source.glob("**/*.ipynb"): generate_table_of_content(notebook)