fixed spaces in dictionary text display
Browse files- lsj_dict.py +4 -3
lsj_dict.py
CHANGED
@@ -28,8 +28,8 @@ def extract_entry_info(entry):
|
|
28 |
"""
|
29 |
definitions = defaultdict(dict)
|
30 |
|
31 |
-
# Save the lemma in the dictionary
|
32 |
-
lemma = entry.get('key')
|
33 |
|
34 |
# Save the orthographies in the dictionary
|
35 |
orthographies = [orth.text for orth in entry.findall('orth')]
|
@@ -108,7 +108,7 @@ def format_text(data):
|
|
108 |
text = data['definitions']['text']
|
109 |
|
110 |
# Change <tr> tags to bold
|
111 |
-
text = text.replace("<tr>", "**").replace("</tr>", "**")
|
112 |
|
113 |
# Change [SENSE_SEPARATOR] to integers
|
114 |
for i in range(len(text.split("[SENSE_SEPARATOR]"))):
|
@@ -143,6 +143,7 @@ def main():
|
|
143 |
for word, info in xml_info.items():
|
144 |
# Merge dictionaries, assuming word is unique across all files
|
145 |
merged_info.setdefault(word, {}).update(info)
|
|
|
146 |
|
147 |
# Store merged dictionaries as .json file with pretty print
|
148 |
with open("lsj_dict.json", "w", encoding="utf-8") as file:
|
|
|
28 |
"""
|
29 |
definitions = defaultdict(dict)
|
30 |
|
31 |
+
# Save the lemma in the dictionary and remove digits
|
32 |
+
lemma = ''.join([i for i in entry.get('key') if not i.isdigit()])
|
33 |
|
34 |
# Save the orthographies in the dictionary
|
35 |
orthographies = [orth.text for orth in entry.findall('orth')]
|
|
|
108 |
text = data['definitions']['text']
|
109 |
|
110 |
# Change <tr> tags to bold
|
111 |
+
text = text.replace("<tr>", "**").replace("</tr>", "**").replace(",", ", ").replace(";", "; ").replace(":", ": ").replace("(", " (").replace(")", ") ").replace("[", " [").replace("]", "] ").replace(" ,", ", ").replace(" ; ", "; ").replace(" : ", ": ").replace(" ." , ". ")
|
112 |
|
113 |
# Change [SENSE_SEPARATOR] to integers
|
114 |
for i in range(len(text.split("[SENSE_SEPARATOR]"))):
|
|
|
143 |
for word, info in xml_info.items():
|
144 |
# Merge dictionaries, assuming word is unique across all files
|
145 |
merged_info.setdefault(word, {}).update(info)
|
146 |
+
|
147 |
|
148 |
# Store merged dictionaries as .json file with pretty print
|
149 |
with open("lsj_dict.json", "w", encoding="utf-8") as file:
|