Spaces:

ILAD
/

rhg-script-converter-ui

Sleeping

micahg commited on Feb 29

Commit

72b7374

•

1 Parent(s): b4a732f

spelling edits

Files changed (2) hide show

.gitignore CHANGED Viewed

@@ -1,5 +1,7 @@
 transliterate.py
 output.txt
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

 transliterate.py
 output.txt
+process_folder.py
+process_all_folders.py
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

functions.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import epitran
 import re
 def to_lroh(s):
     s = s.replace('ɖ', 'ḍ')
@@ -104,6 +105,11 @@ def to_roheng(s):
     """
     # step to standardize all nasalized vowels as precomposed characters
     s = re.sub('ã', 'ã', s)
     s = re.sub('ẽ', 'ẽ', s)
     s = re.sub('ĩ', 'ĩ', s)
@@ -120,6 +126,15 @@ def to_roheng(s):
         # dipthongs/glides
         #elif re.search(r'[aãeẽiĩoõuũ]{2}', words[i]):
         words[i] = re.sub(r'([iĩ])([aãeẽoõuũ])', r'\1y\2', words[i])
     s = ' '.join(words)

 import epitran
 import re
+import string
 def to_lroh(s):
     s = s.replace('ɖ', 'ḍ')
     """
     # step to standardize all nasalized vowels as precomposed characters
+    s = re.sub('Ã', 'Ã', s)
+    s = re.sub('Ẽ', 'Ẽ', s)
+    s = re.sub('Ĩ', 'Ĩ', s)
+    s = re.sub('Õ', 'Õ', s)
+    s = re.sub('Ũ', 'Ũ', s)
     s = re.sub('ã', 'ã', s)
     s = re.sub('ẽ', 'ẽ', s)
     s = re.sub('ĩ', 'ĩ', s)
         # dipthongs/glides
         #elif re.search(r'[aãeẽiĩoõuũ]{2}', words[i]):
         words[i] = re.sub(r'([iĩ])([aãeẽoõuũ])', r'\1y\2', words[i])
+        # spelling errors
+        """
+        TODO: replace with dictionary to map
+        """
+        if ''.join(char for char in words[i].strip() if char not in string.punctuation) == 'in':
+            words[i] = words[i].replace('in', 'iin')
+        elif ''.join(char for char in words[i].strip() if char not in string.punctuation) == 'hin':
+            words[i] = words[i].replace('hin', 'hiin')
     s = ' '.join(words)