Sebastian Gehrmann commited on
Commit
8a3a160
1 Parent(s): 9999db9

add md formatting

Browse files
Files changed (2) hide show
  1. datacards/context.py +2 -2
  2. formatting/format_as_md.py +77 -0
datacards/context.py CHANGED
@@ -33,7 +33,7 @@ def context_page():
33
  if "yes" in st.session_state.card_dict["context"]["previous"]["is-deployed"]:
34
  make_text_area(
35
  label="Did any of these previous uses result in observations about the social impact of the systems? " + \
36
- "In particular, has there been work outlining the risks and limitations of the system? Provide links and descriptions here:",
37
  key_list=key_pref + ["described-risks"],
38
  help="",
39
  )
@@ -62,7 +62,7 @@ def context_page():
62
  )
63
  if st.session_state.card_dict["context"]["underserved"]["helps-underserved"] == "yes":
64
  make_text_area(
65
- label="Describe how this dataset addresses the needs of underserved communities",
66
  key_list=key_pref+["underserved-description"],
67
  )
68
  else:
 
33
  if "yes" in st.session_state.card_dict["context"]["previous"]["is-deployed"]:
34
  make_text_area(
35
  label="Did any of these previous uses result in observations about the social impact of the systems? " + \
36
+ "In particular, has there been work outlining the risks and limitations of the system? Provide links and descriptions here.",
37
  key_list=key_pref + ["described-risks"],
38
  help="",
39
  )
 
62
  )
63
  if st.session_state.card_dict["context"]["underserved"]["helps-underserved"] == "yes":
64
  make_text_area(
65
+ label="Describe how this dataset addresses the needs of underserved communities.",
66
  key_list=key_pref+["underserved-description"],
67
  )
68
  else:
formatting/format_as_md.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import pathlib
4
+
5
+ parser = argparse.ArgumentParser(
6
+ description="Format the output of the data card tool as .md for the hub."
7
+ )
8
+ parser.add_argument("--input_path", "-i", type=pathlib.Path, required=True)
9
+ parser.add_argument("--output_path", "-o", type=pathlib.Path, required=True)
10
+ args = parser.parse_args()
11
+
12
+ def read_json_file(json_path: pathlib.Path):
13
+ """Load a json file and return it as object."""
14
+ with open(json_path, "r") as f:
15
+ data = json.load(f)
16
+ return data
17
+
18
+ def save_md_file(md_path: pathlib.Path, md_string: str):
19
+ """Takes a string and saves it as .md file."""
20
+ with open(md_path, 'w') as f:
21
+ f.write(md_string)
22
+
23
+ def construct_md(data_card_data: dict, text_by_key: dict):
24
+ """Constructs the markdown file
25
+
26
+ This function iterates through text_by_key and extracts all answers from
27
+ the data_card_data object. It uses the levels of hierarchy as indicator for
28
+ the heading indentation and does not change the order in which anything
29
+ appears.
30
+
31
+ Args:
32
+ data_card_data: Output from the data card tool
33
+ text_by_key: configuration defined in key_to_question.json
34
+
35
+ Returns:
36
+ data_card_md_string: Markdown-formatted content
37
+ """
38
+
39
+ data_card_md_string = ""
40
+
41
+ for main_key, main_content in text_by_key.items():
42
+ section_header = main_content['section-title']
43
+ # Remove it so that we don't iterate over it.
44
+ del main_content['section-title']
45
+ # Add it to string.
46
+ data_card_md_string += f"## {section_header} \n\n"
47
+ # Iterate over the subsections.
48
+ for second_key, second_content in main_content.items():
49
+ subsection_header = second_content['section-title']
50
+ # Remove it so that we don't iterate over it.
51
+ del second_content['section-title']
52
+ # Add it to string.
53
+ data_card_md_string += f"### {subsection_header} \n\n"
54
+ # Finally, iterate over actual questions.
55
+ for question_key, question_text in second_content.items():
56
+ # Now grab the answer text.
57
+ reply = data_card_data[main_key][second_key].get(question_key, "N/A")
58
+ if reply and reply != "N/A":
59
+ data_card_md_string += f"#### {question_text} \n\n"
60
+ # Special parsing options here.
61
+ if question_key == question_key == "paper-bibtext":
62
+ data_card_md_string += f"```\n{reply}\n``` \n\n"
63
+ if question_key == "structure-example":
64
+ data_card_md_string += f"```json\n{reply}\n``` \n\n"
65
+ if isinstance(reply, list):
66
+ data_card_md_string += f"{' '.join(reply)} \n\n"
67
+ else:
68
+ data_card_md_string += f"{reply} \n\n"
69
+
70
+ return data_card_md_string
71
+
72
+
73
+ if __name__ == "__main__":
74
+ data_card_data = read_json_file(args.input_path)
75
+ text_by_key = read_json_file("key_to_question.json")
76
+ data_card_md_string = construct_md(data_card_data, text_by_key)
77
+ save_md_file(args.output_path, data_card_md_string)