Spaces:

TDLI2024
/

Aru_Thaqana

Sleeping

App Files Files Community

Alexis Palmer commited on Jan 12

Commit

29d61ac

•

1 Parent(s): b965b6e

Scramble para Aymara, first version

Browse files

Files changed (3) hide show

app.py +88 -0
aymara.easy.filtered +71 -0
util.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import gradio as gr
+import util
+import re
+import random
+### load and prepare corpus
+#corpus = util.load_raw_text(corpus_directory="map_avenue")
+corpus = util.load_single_raw_text_file("aymara.easy.filtered")
+#corpus = corpus.lower()
+#word_regex = r"[a-z]+"
+#def tokenize(text: str):
+#    return re.findall(word_regex, text)
+#words = tokenize(corpus)
+words = corpus.split()
+print(words)
+lexicon = set()
+for word in words:
+    lexicon.add(word)
+filtered_lexicon = set()
+for word in lexicon:
+    filtered_lexicon.add(word)
+    #    if 4 <= len(word) <= 6:
+#        filtered_lexicon.add(word)
+print(len(filtered_lexicon))
+def random_scramble(lexicon: set):
+    lexicon = list(lexicon)
+    word = random.choice(lexicon)
+    # Turn the word into a list of characters
+    word_chars = list(word)
+    # Shuffle those characters
+    random.shuffle(word_chars)
+    # Re-join the characters into a string
+    shuffled = ''.join(word_chars)
+    return {'shuffled': shuffled, 'original': word}
+def scrambler_game(current_word, guess: str):
+    """
+    If `guess` is the correct word, return 'Correct' and pick a new word. Otherwise, return 'Incorrect'
+    Returns (correct_label, scrambled_word, current_word)
+    """
+    if guess == current_word['original']:
+        current_word = random_scramble(filtered_lexicon)
+        return ('😀 ¡Correcto! 😀', current_word['shuffled'], current_word)
+    else:
+        return ('Incorrecto 😕', current_word['shuffled'], current_word)
+def new_word():
+    current_word = random_scramble(filtered_lexicon)
+    return ('', current_word['shuffled'], current_word)
+with gr.Blocks(theme=gr.themes.Soft(), title="Aru Thaqana") as unscramble:
+    # Start with some initial word
+    current_word = gr.State(random_scramble(filtered_lexicon))
+    gr.Markdown("# Aru Thaqana")
+    # Notice how we set the initial value based on the State
+    scrambled_textbox = gr.Textbox(label="Crucigrama", interactive=False, value=current_word.value['shuffled'])
+    guess_textbox = gr.Textbox(label="Adivinar - Adivina la palabra y luego aprieta en 'enviar'")
+    guess_button = gr.Button(value="Enviar")
+    new_word_button = gr.Button(value="Nueva Palabra")
+    output_textbox = gr.Textbox(label="Resultado", interactive=False)
+    guess_button.click(fn=scrambler_game, inputs=[current_word, guess_textbox], outputs=[output_textbox, scrambled_textbox, current_word])
+    new_word_button.click(fn=new_word, inputs=[], outputs=[output_textbox, scrambled_textbox, current_word])
+unscramble.launch(share=True)

aymara.easy.filtered ADDED Viewed

	@@ -0,0 +1,71 @@

+waka
+kayunaka
+paya
+maya
+qunqurinaka
+ñik'uta
+laka
+kimsa
+wank'u
+kallachi
+Uqi
+tiyu
+yuxch'a
+anu
+Aymara
+papilanku
+kusikusi
+qaqilu
+paqallqu
+awki
+ispillu
+achaku
+phisqa
+puraka
+laxra
+chu'xña
+kunka
+chara
+ampara
+warmi
+achila
+tullqa
+jamach'i
+patu
+puma
+mallku
+p'iqi
+nayra
+tunka
+Jinchu
+kimsaqallqu
+Ch'umpi
+larama
+wallpa
+tayka
+awch'i
+Ch'ara
+Quri
+pusi
+phisi
+khuchi
+mujlli
+asiru
+kullaka
+Janq'u
+jilata
+suxta
+ch'usa
+Ch'uri
+jikhani
+awicha
+chacha
+uwija
+nasa
+ñuñu
+k'ulli
+tiya
+chawlla
+wila
+q'illu

util.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import os
+import re
+import unicodedata
+def strip_accents(text: str) -> str:
+    """Removes accents from text."""
+    return ''.join(c for c in unicodedata.normalize('NFD', text)
+                  if unicodedata.category(c) != 'Mn')
+def load_raw_text(corpus_directory: str, file_names=None) -> str:
+    """Loads all the text files in a directory into one large string"""
+    corpus = ""
+    for file_name in os.listdir(corpus_directory):
+        # Read the file as a string
+        file_path = os.path.join(corpus_directory, file_name)
+        if os.path.isdir(file_path):
+            continue
+        #  Make sure we only read text files
+        if ".txt" not in file_name:
+            continue
+        with open(file_path, 'r') as file:
+            file_contents = file.read()
+            corpus += (file_contents + "\n")
+    return corpus
+def load_single_raw_text_file(file_name):
+    """Loads a single text file into one large string"""
+    corpus = ""
+    with open(file_name, 'r') as file:
+        file_contents = file.read()
+        corpus += (file_contents + "\n")
+    return corpus
+word_regex = r"[\w|\']+"
+def tokenize(text):
+    return re.findall(word_regex, text)
+def preprocess(text):
+    """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
+    text = strip_accents(text)
+    text = text.lower()
+    tokens = text.split(" ")
+    tokens_filtered = []
+    for token in tokens:
+        # Skip any tokens with special characters
+        if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
+            tokens_filtered.append(token)
+    return tokens_filtered
+def pad(text: list, num_padding: int):
+    """Pads the given text, as a list of strings, with <s> characters between sentences."""
+    padded_text = []
+    # Add initial padding to the first sentence
+    for _ in range(num_padding):
+        padded_text.append("<s>")
+    for word in text:
+        padded_text.append(word)
+        # Every time we see an end punctuation mark, add <s> tokens before it
+        # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
+        if word in [".", "?", "!"]:
+            for _ in range(num_padding):
+                padded_text.append("<s>")
+    return padded_text