from argparse import ArgumentParser from json import load import pathlib import os def multi_grep(d, l1, l2, l3): return d.get(l1, {}).get(l2, {}).get(l3, "[Needs More Information]") def multi_grep2(d, l1, l2, l3): return d.get(l1, {}).get(l2, {}).get(l3, ["unknown"]) def sanitize_md_url(s): """Strip out MD fragments if they exist.""" if len(s.split("](")) > 1: return s.split("](")[1].replace(")", "") else: return s # --- # annotations_creators: # - expert-generated # language_creators: # - found # languages: # - en # licenses: # - unknown # multilinguality: # - monolingual # pretty_name: FairytaleQA # size_categories: # - 10K\n' if field.get('info', False): markdown += f'\n' if field.get('scope', False): markdown += f'\n' markdown += field.get('content', '') return markdown + '\n' # def main(): # """Converts JSON output from `reformat_json.py` # to Markdown input for Data Cards Labs.""" # args = parse_args() # for filename in args.input: # if filename[-5:] == '.json': # json_to_markdown(filename) if __name__ == "__main__": for dataset in os.listdir("../../../GEMv2"): data_card_path = f"../../../GEMv2/{dataset}/{dataset}.json" if os.path.exists(data_card_path): print(f"Now processing {dataset}.") # This script assumes you have run reformat_json.py new_path = f"datacards/{dataset}.json" md_string = json_to_markdown(new_path, data_card_path) else: print(f"{dataset} has no data card!")