Alexis Palmer commited on
Commit
29d61ac
1 Parent(s): b965b6e

Scramble para Aymara, first version

Browse files
Files changed (3) hide show
  1. app.py +88 -0
  2. aymara.easy.filtered +71 -0
  3. util.py +79 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import util
3
+ import re
4
+ import random
5
+
6
+ ### load and prepare corpus
7
+ #corpus = util.load_raw_text(corpus_directory="map_avenue")
8
+
9
+ corpus = util.load_single_raw_text_file("aymara.easy.filtered")
10
+
11
+ #corpus = corpus.lower()
12
+ #word_regex = r"[a-z]+"
13
+ #def tokenize(text: str):
14
+ # return re.findall(word_regex, text)
15
+
16
+ #words = tokenize(corpus)
17
+ words = corpus.split()
18
+ print(words)
19
+
20
+
21
+ lexicon = set()
22
+ for word in words:
23
+ lexicon.add(word)
24
+
25
+ filtered_lexicon = set()
26
+
27
+ for word in lexicon:
28
+ filtered_lexicon.add(word)
29
+ # if 4 <= len(word) <= 6:
30
+ # filtered_lexicon.add(word)
31
+
32
+ print(len(filtered_lexicon))
33
+
34
+ def random_scramble(lexicon: set):
35
+ lexicon = list(lexicon)
36
+
37
+ word = random.choice(lexicon)
38
+
39
+ # Turn the word into a list of characters
40
+ word_chars = list(word)
41
+
42
+ # Shuffle those characters
43
+ random.shuffle(word_chars)
44
+
45
+ # Re-join the characters into a string
46
+ shuffled = ''.join(word_chars)
47
+
48
+ return {'shuffled': shuffled, 'original': word}
49
+
50
+
51
+
52
+ def scrambler_game(current_word, guess: str):
53
+ """
54
+ If `guess` is the correct word, return 'Correct' and pick a new word. Otherwise, return 'Incorrect'
55
+ Returns (correct_label, scrambled_word, current_word)
56
+ """
57
+ if guess == current_word['original']:
58
+ current_word = random_scramble(filtered_lexicon)
59
+ return ('😀 ¡Correcto! 😀', current_word['shuffled'], current_word)
60
+ else:
61
+ return ('Incorrecto 😕', current_word['shuffled'], current_word)
62
+
63
+
64
+ def new_word():
65
+ current_word = random_scramble(filtered_lexicon)
66
+ return ('', current_word['shuffled'], current_word)
67
+
68
+
69
+ with gr.Blocks(theme=gr.themes.Soft(), title="Aru Thaqana") as unscramble:
70
+ # Start with some initial word
71
+ current_word = gr.State(random_scramble(filtered_lexicon))
72
+
73
+ gr.Markdown("# Aru Thaqana")
74
+
75
+ # Notice how we set the initial value based on the State
76
+ scrambled_textbox = gr.Textbox(label="Crucigrama", interactive=False, value=current_word.value['shuffled'])
77
+
78
+ guess_textbox = gr.Textbox(label="Adivinar - Adivina la palabra y luego aprieta en 'enviar'")
79
+ guess_button = gr.Button(value="Enviar")
80
+
81
+ new_word_button = gr.Button(value="Nueva Palabra")
82
+
83
+ output_textbox = gr.Textbox(label="Resultado", interactive=False)
84
+
85
+ guess_button.click(fn=scrambler_game, inputs=[current_word, guess_textbox], outputs=[output_textbox, scrambled_textbox, current_word])
86
+ new_word_button.click(fn=new_word, inputs=[], outputs=[output_textbox, scrambled_textbox, current_word])
87
+
88
+ unscramble.launch(share=True)
aymara.easy.filtered ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ waka
3
+ kayunaka
4
+ paya
5
+ maya
6
+ qunqurinaka
7
+ ñik'uta
8
+ laka
9
+ kimsa
10
+ wank'u
11
+ kallachi
12
+ Uqi
13
+ tiyu
14
+ yuxch'a
15
+ anu
16
+ Aymara
17
+ papilanku
18
+ kusikusi
19
+ qaqilu
20
+ paqallqu
21
+ awki
22
+ ispillu
23
+ achaku
24
+ phisqa
25
+ puraka
26
+ laxra
27
+ chu'xña
28
+ kunka
29
+ chara
30
+ ampara
31
+ warmi
32
+ achila
33
+ tullqa
34
+ jamach'i
35
+ patu
36
+ puma
37
+ mallku
38
+ p'iqi
39
+ nayra
40
+ tunka
41
+ Jinchu
42
+ kimsaqallqu
43
+ Ch'umpi
44
+ larama
45
+ wallpa
46
+ tayka
47
+ awch'i
48
+ Ch'ara
49
+ Quri
50
+ pusi
51
+ phisi
52
+ khuchi
53
+ mujlli
54
+ asiru
55
+ kullaka
56
+ Janq'u
57
+ jilata
58
+ suxta
59
+ ch'usa
60
+ Ch'uri
61
+ jikhani
62
+ awicha
63
+ chacha
64
+ uwija
65
+ nasa
66
+ ñuñu
67
+ k'ulli
68
+ tiya
69
+ chawlla
70
+ wila
71
+ q'illu
util.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import unicodedata
4
+
5
+ def strip_accents(text: str) -> str:
6
+ """Removes accents from text."""
7
+ return ''.join(c for c in unicodedata.normalize('NFD', text)
8
+ if unicodedata.category(c) != 'Mn')
9
+
10
+
11
+ def load_raw_text(corpus_directory: str, file_names=None) -> str:
12
+ """Loads all the text files in a directory into one large string"""
13
+ corpus = ""
14
+
15
+ for file_name in os.listdir(corpus_directory):
16
+ # Read the file as a string
17
+ file_path = os.path.join(corpus_directory, file_name)
18
+ if os.path.isdir(file_path):
19
+ continue
20
+
21
+ # Make sure we only read text files
22
+ if ".txt" not in file_name:
23
+ continue
24
+
25
+ with open(file_path, 'r') as file:
26
+ file_contents = file.read()
27
+ corpus += (file_contents + "\n")
28
+ return corpus
29
+
30
+ def load_single_raw_text_file(file_name):
31
+ """Loads a single text file into one large string"""
32
+
33
+ corpus = ""
34
+ with open(file_name, 'r') as file:
35
+ file_contents = file.read()
36
+ corpus += (file_contents + "\n")
37
+
38
+ return corpus
39
+
40
+
41
+ word_regex = r"[\w|\']+"
42
+ def tokenize(text):
43
+ return re.findall(word_regex, text)
44
+
45
+
46
+ def preprocess(text):
47
+ """Tokenizes and processes text which is already separated by spaces into words. Designed for English punctuation."""
48
+ text = strip_accents(text)
49
+ text = text.lower()
50
+
51
+ tokens = text.split(" ")
52
+
53
+ tokens_filtered = []
54
+ for token in tokens:
55
+ # Skip any tokens with special characters
56
+ if re.match(r"[\w|\']+|[\.|\,|\?|\!]", token):
57
+ tokens_filtered.append(token)
58
+ return tokens_filtered
59
+
60
+
61
+ def pad(text: list, num_padding: int):
62
+ """Pads the given text, as a list of strings, with <s> characters between sentences."""
63
+ padded_text = []
64
+
65
+ # Add initial padding to the first sentence
66
+ for _ in range(num_padding):
67
+ padded_text.append("<s>")
68
+
69
+ for word in text:
70
+ padded_text.append(word)
71
+
72
+ # Every time we see an end punctuation mark, add <s> tokens before it
73
+ # REPLACE IF YOUR LANGUAGE USES DIFFERENT END PUNCTUATION
74
+ if word in [".", "?", "!"]:
75
+ for _ in range(num_padding):
76
+ padded_text.append("<s>")
77
+
78
+
79
+ return padded_text