Mark7549 commited on
Commit
6cd05ce
1 Parent(s): 8fb441e

fixed spaces in dictionary text display

Browse files
Files changed (1) hide show
  1. lsj_dict.py +4 -3
lsj_dict.py CHANGED
@@ -28,8 +28,8 @@ def extract_entry_info(entry):
28
  """
29
  definitions = defaultdict(dict)
30
 
31
- # Save the lemma in the dictionary
32
- lemma = entry.get('key')
33
 
34
  # Save the orthographies in the dictionary
35
  orthographies = [orth.text for orth in entry.findall('orth')]
@@ -108,7 +108,7 @@ def format_text(data):
108
  text = data['definitions']['text']
109
 
110
  # Change <tr> tags to bold
111
- text = text.replace("<tr>", "**").replace("</tr>", "**")
112
 
113
  # Change [SENSE_SEPARATOR] to integers
114
  for i in range(len(text.split("[SENSE_SEPARATOR]"))):
@@ -143,6 +143,7 @@ def main():
143
  for word, info in xml_info.items():
144
  # Merge dictionaries, assuming word is unique across all files
145
  merged_info.setdefault(word, {}).update(info)
 
146
 
147
  # Store merged dictionaries as .json file with pretty print
148
  with open("lsj_dict.json", "w", encoding="utf-8") as file:
 
28
  """
29
  definitions = defaultdict(dict)
30
 
31
+ # Save the lemma in the dictionary and remove digits
32
+ lemma = ''.join([i for i in entry.get('key') if not i.isdigit()])
33
 
34
  # Save the orthographies in the dictionary
35
  orthographies = [orth.text for orth in entry.findall('orth')]
 
108
  text = data['definitions']['text']
109
 
110
  # Change <tr> tags to bold
111
+ text = text.replace("<tr>", "**").replace("</tr>", "**").replace(",", ", ").replace(";", "; ").replace(":", ": ").replace("(", " (").replace(")", ") ").replace("[", " [").replace("]", "] ").replace(" ,", ", ").replace(" ; ", "; ").replace(" : ", ": ").replace(" ." , ". ")
112
 
113
  # Change [SENSE_SEPARATOR] to integers
114
  for i in range(len(text.split("[SENSE_SEPARATOR]"))):
 
143
  for word, info in xml_info.items():
144
  # Merge dictionaries, assuming word is unique across all files
145
  merged_info.setdefault(word, {}).update(info)
146
+
147
 
148
  # Store merged dictionaries as .json file with pretty print
149
  with open("lsj_dict.json", "w", encoding="utf-8") as file: