Spaces:

GroNLP
/

agalma

Running

App Files Files Community

Mark7549 commited on May 16, 2024

Commit

2605d63

1 Parent(s): 94e7f68

improved display of the library

Browse files

Files changed (3) hide show

app.py +60 -1
lsj_dict.json +2 -2
lsj_dict.py +61 -12

app.py CHANGED Viewed

@@ -46,6 +46,18 @@ active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D g
     menu_icon="cast", default_index=0, orientation="horizontal")
 # Nearest neighbours tab
 if active_tab == "Nearest neighbours":
@@ -215,7 +227,54 @@ elif active_tab == "Dictionary":
             # Put text in readable format
             text = format_text(data)
-            st.markdown(text)

     menu_icon="cast", default_index=0, orientation="horizontal")
+# Adding CSS style to remove list-style-type
+st.markdown("""
+<style>
+/* Define a class to remove list-style-type */
+.no-list-style {
+    list-style-type: none;
+}
+</style>
+""", unsafe_allow_html=True)
 # Nearest neighbours tab
 if active_tab == "Nearest neighbours":
             # Put text in readable format
             text = format_text(data)
+            st.markdown(format_text(data), unsafe_allow_html = True)
+            st.markdown("""
+                        <style>
+                        .tab {
+                            display: inline-block;
+                            margin-left: 4em;
+                        }
+                        .tr {
+                            font-weight: bold;
+                        }
+                        .list-class {
+                            list-style-type: none;
+                            margin-top: 1em;
+                        }
+                        .primary-indicator {
+                            font-weight: bold;
+                            font-size: x-large;
+                        }
+                        .secondary-indicator {
+                            font-weight: bold;
+                            font-size: large;
+                        }
+                        .tertiary-indicator {
+                            font-weight: bold;
+                            font-size: medium;
+                        }
+                        .quaternary-indicator {
+                            font-weight: bold;
+                            font-size: medium;
+                        }
+                        .primary-class {
+                            padding-left: 2em;
+                        }
+                        .secondary-class {
+                            padding-left: 4em;
+                        }
+                        .tertiary-class {
+                            padding-left: 6em;
+                        }
+                        .quaternary-class {
+                            padding-left: 8em;
+                        }
+                        </style>
+                        """, unsafe_allow_html=True)

lsj_dict.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30459c5cf72d067b38e5419903e202bb06cc38426bf8a42b58cc1472d2cc1320
-size 135538892

 version https://git-lfs.github.com/spec/v1
+oid sha256:79d162dda9e22917970d9eb84e108fa180f228bb52fc7e1cf48904f84d4676f5
+size 132311884

lsj_dict.py CHANGED Viewed

@@ -2,7 +2,7 @@ import xml.etree.ElementTree as ET
 from collections import defaultdict
 from autocomplete import load_compressed_word_list
 import json
 def read_xml(file):
     """
@@ -39,33 +39,48 @@ def extract_entry_info(entry):
     definition = ' '.join(entry.itertext()).strip()
     definitions[lemma]['definitions'] = {'tr': definition}
     text = get_descendants_text(entry)
     cleaned_text = prettify_text(text)
     definitions[lemma]['definitions']['text'] = cleaned_text
-    return {'lemma': lemma, 'orthographies': orthographies, 'definitions': definitions[lemma]['definitions']}
 def get_descendants_text(element):
     """
     Get all the text of the descendants of a given element, separating every 'sense' element.
     """
     text = ""
     for child in element:
         if child.tag == 'sense':
             # Add a separator before each 'sense' element
-            text += "[SENSE_SEPARATOR]\n\n"
         if child.tag == 'tr' and element.tag == 'sense':
             # Add [tr] tags around text inside 'tr' tags within 'sense' tags
             if child.text is not None:
                 text += f"<tr>{child.text.strip()}</tr>\n"
         else:
             text += child.text or ""
             text += get_descendants_text(child)
-            text += child.tail or ""
     return text
 def prettify_text(text):
@@ -78,7 +93,8 @@ def prettify_text(text):
     # Prettify each part separately
     prettified_parts = []
-    for part in parts:
         # Remove leading and trailing whitespace and join lines with a space
         cleaned_part = ' '.join(line.strip() for line in part.split('\n') if line.strip())
         prettified_parts.append(cleaned_part)
@@ -108,13 +124,43 @@ def format_text(data):
     text = data['definitions']['text']
     # Change <tr> tags to bold
-    text = text.replace("<tr>", "**").replace("</tr>", "**").replace(",", ", ").replace(";", "; ").replace(":", ": ").replace("(", " (").replace(")", ") ").replace("[", " [").replace("]", "] ").replace(" ,", ", ").replace(" ; ", "; ").replace(" : ", ": ").replace(" ." , ". ")
-    # Change [SENSE_SEPARATOR] to integers
-    for i in range(len(text.split("[SENSE_SEPARATOR]"))):
-        text = text.replace("[SENSE_SEPARATOR]", f"{i+1}.")
-    return text
 def main():
@@ -163,3 +209,6 @@ def print_test(lemma_dict):
 if __name__ == "__main__":
     main()

 from collections import defaultdict
 from autocomplete import load_compressed_word_list
 import json
+import streamlit as st
 def read_xml(file):
     """
     definition = ' '.join(entry.itertext()).strip()
     definitions[lemma]['definitions'] = {'tr': definition}
     text = get_descendants_text(entry)
     cleaned_text = prettify_text(text)
     definitions[lemma]['definitions']['text'] = cleaned_text
+    return {'lemma': lemma, 'orthographies': orthographies, 'definitions': definitions[lemma]['definitions']}
 def get_descendants_text(element):
     """
     Get all the text of the descendants of a given element, separating every 'sense' element.
     """
     text = ""
+    level_indicators = [
+                        'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X',
+                        '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
+                        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+                        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+                        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
+                        'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
+                    ]
     for child in element:
         if child.tag == 'sense':
             # Add a separator before each 'sense' element
+            text += f"[SENSE_SEPARATOR]\n\n"
         if child.tag == 'tr' and element.tag == 'sense':
             # Add [tr] tags around text inside 'tr' tags within 'sense' tags
             if child.text is not None:
                 text += f"<tr>{child.text.strip()}</tr>\n"
+                text += child.tail
         else:
+            if child.get('n') and len(child.get('n')) <= 2:
+                text += f"{child.get('n')}. "
             text += child.text or ""
             text += get_descendants_text(child)
     return text
 def prettify_text(text):
     # Prettify each part separately
     prettified_parts = []
+    for part in parts:
         # Remove leading and trailing whitespace and join lines with a space
         cleaned_part = ' '.join(line.strip() for line in part.split('\n') if line.strip())
         prettified_parts.append(cleaned_part)
     text = data['definitions']['text']
     # Change <tr> tags to bold
+    text = text.replace("<tr>", "<span class='tr'> ").replace("</tr>", "</span>").replace(",", ", ").replace(";", "; ").replace(":", ": ").replace("(", " (").replace(")", ") ").replace("[", " [").replace("]", "] ").replace(" ,", ", ").replace(" ; ", "; ").replace(" : ", ": ").replace(" ." , ". ")
+    formatted_text = []
+    primary_indicators = [
+        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J"
+    ]
+    secondary_indicators = [
+        "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X"
+    ]
+    tertiary_indicators = [
+        "2", "3", "4", "5", "6", "7", "8", "9", "10"
+    ]
+    quaternary_indicators = [
+        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
+        "k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
+        "u", "v", "w", "x", "y", "z"
+    ]
+    for text_part in text.split("[SENSE_SEPARATOR]"):
+        level = text_part.split(".")[0].strip()
+        text_part = text_part.replace(level + ".", "")
+        if level:
+            if level in secondary_indicators:
+                formatted_text.append(f"<div class='list-class secondary-class'><span class='secondary-indicator'>{level}.</span> {text_part.replace('[SENSE_SEPARATOR]', '')} </div>")
+            elif level in tertiary_indicators:
+                formatted_text.append(f"<div class='list-class tertiary-class'> <span class='tertiary-indicator'>{level}.</span> {text_part.replace('[SENSE_SEPARATOR]', '')} </div>")
+            elif level in quaternary_indicators:
+                formatted_text.append(f"<div class='list-class quaternary-class'> <span class='quaternary-indicator'>{level}.</span> {text_part.replace('[SENSE_SEPARATOR]', '')} </div> ")
+            elif level in primary_indicators:
+                formatted_text.append(f"<div class='list-class primary-class'> <span class='primary-indicator'>{level}.</span> {text_part.replace('[SENSE_SEPARATOR]', '')} </div>")
+    return '\n'.join(formatted_text)
 def main():
 if __name__ == "__main__":
     main()