feat: enable word list cache
Browse files- .gitignore +1 -0
- font_dataset/text.py +8 -3
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
dataset
|
2 |
lyrics_corpus/cache
|
3 |
|
|
|
1 |
+
wordlist.txt
|
2 |
dataset
|
3 |
lyrics_corpus/cache
|
4 |
|
font_dataset/text.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import random
|
2 |
import requests
|
3 |
from .font import DSFont
|
@@ -135,9 +136,13 @@ class RandomCorpusGeneratorWithEnglish(CommonCorpusGenerator):
|
|
135 |
def __init__(
|
136 |
self, char_set: str, prob: float = 0.3, when_length_greater_than: int = 10
|
137 |
):
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
141 |
self.char_set = char_set
|
142 |
self.prob = prob
|
143 |
self.when_length_greater_than = when_length_greater_than
|
|
|
1 |
+
import os
|
2 |
import random
|
3 |
import requests
|
4 |
from .font import DSFont
|
|
|
136 |
def __init__(
|
137 |
self, char_set: str, prob: float = 0.3, when_length_greater_than: int = 10
|
138 |
):
|
139 |
+
if os.path.exists("wordlist.txt"):
|
140 |
+
with open("wordlist.txt", "r", encoding="utf-8") as f:
|
141 |
+
self.english_words = f.read().splitlines()
|
142 |
+
else:
|
143 |
+
word_site = "https://www.mit.edu/~ecprice/wordlist.10000"
|
144 |
+
response = requests.get(word_site)
|
145 |
+
self.english_words = response.text.splitlines()
|
146 |
self.char_set = char_set
|
147 |
self.prob = prob
|
148 |
self.when_length_greater_than = when_length_greater_than
|