gyrojeff commited on
Commit
c34db03
1 Parent(s): 2046da1

feat: enable word list cache

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. font_dataset/text.py +8 -3
.gitignore CHANGED
@@ -1,3 +1,4 @@
 
1
  dataset
2
  lyrics_corpus/cache
3
 
 
1
+ wordlist.txt
2
  dataset
3
  lyrics_corpus/cache
4
 
font_dataset/text.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import random
2
  import requests
3
  from .font import DSFont
@@ -135,9 +136,13 @@ class RandomCorpusGeneratorWithEnglish(CommonCorpusGenerator):
135
  def __init__(
136
  self, char_set: str, prob: float = 0.3, when_length_greater_than: int = 10
137
  ):
138
- word_site = "https://www.mit.edu/~ecprice/wordlist.10000"
139
- response = requests.get(word_site)
140
- self.english_words = response.text.splitlines()
 
 
 
 
141
  self.char_set = char_set
142
  self.prob = prob
143
  self.when_length_greater_than = when_length_greater_than
 
1
+ import os
2
  import random
3
  import requests
4
  from .font import DSFont
 
136
  def __init__(
137
  self, char_set: str, prob: float = 0.3, when_length_greater_than: int = 10
138
  ):
139
+ if os.path.exists("wordlist.txt"):
140
+ with open("wordlist.txt", "r", encoding="utf-8") as f:
141
+ self.english_words = f.read().splitlines()
142
+ else:
143
+ word_site = "https://www.mit.edu/~ecprice/wordlist.10000"
144
+ response = requests.get(word_site)
145
+ self.english_words = response.text.splitlines()
146
  self.char_set = char_set
147
  self.prob = prob
148
  self.when_length_greater_than = when_length_greater_than