import argparse import json import pathlib import os parser = argparse.ArgumentParser( description="Format the output of the data card tool as .md for the hub." ) parser.add_argument("--input_path", "-i", type=pathlib.Path, required=True) parser.add_argument("--output_path", "-o", type=pathlib.Path, required=True) args = parser.parse_args() def read_json_file(json_path: pathlib.Path): """Load a json file and return it as object.""" with open(json_path, "r") as f: data = json.load(f) return data def save_md_file(md_path: pathlib.Path, md_string: str): """Takes a string and saves it as .md file.""" with open(md_path, 'w') as f: f.write(md_string) def construct_md(data_card_data: dict, text_by_key: dict): """Constructs the markdown file This function iterates through text_by_key and extracts all answers from the data_card_data object. It uses the levels of hierarchy as indicator for the heading indentation and does not change the order in which anything appears. Args: data_card_data: Output from the data card tool text_by_key: configuration defined in key_to_question.json Returns: data_card_md_string: Markdown-formatted content """ data_card_md_string = "" for main_key, main_content in text_by_key.items(): section_header = main_content['section-title'] # Remove it so that we don't iterate over it. del main_content['section-title'] # Add it to string. data_card_md_string += f"## {section_header} \n\n" # Iterate over the subsections. for second_key, second_content in main_content.items(): subsection_header = second_content['section-title'] # Remove it so that we don't iterate over it. del second_content['section-title'] # Add it to string. data_card_md_string += f"### {subsection_header} \n\n" # Finally, iterate over actual questions. for question_key, question_text in second_content.items(): # Now grab the answer text. reply = data_card_data[main_key][second_key].get(question_key, "N/A") if reply and reply != "N/A": data_card_md_string += f"#### {question_text} \n\n" # Special parsing options here. if question_key == question_key == "paper-bibtext": data_card_md_string += f"```\n{reply}\n``` \n\n" elif question_key == "structure-example": data_card_md_string += f"```json\n{reply}\n``` \n\n" elif isinstance(reply, list): data_card_md_string += f"{' '.join(reply)} \n\n" else: data_card_md_string += f"{reply} \n\n" return data_card_md_string if __name__ == "__main__": data_card_data = read_json_file(args.input_path) text_by_key = read_json_file(os.path.join(os.path.dirname(__file__), "key_to_question.json")) data_card_md_string = construct_md(data_card_data, text_by_key) save_md_file(args.output_path, data_card_md_string)