feat(text): support emojis (#154)
Browse files- setup.cfg +1 -0
- src/dalle_mini/model/text.py +3 -0
setup.cfg
CHANGED
@@ -27,6 +27,7 @@ install_requires =
|
|
27 |
einops
|
28 |
unidecode
|
29 |
ftfy
|
|
|
30 |
pillow
|
31 |
jax
|
32 |
flax
|
|
|
27 |
einops
|
28 |
unidecode
|
29 |
ftfy
|
30 |
+
emoji
|
31 |
pillow
|
32 |
jax
|
33 |
flax
|
src/dalle_mini/model/text.py
CHANGED
@@ -8,6 +8,7 @@ import random
|
|
8 |
import re
|
9 |
from pathlib import Path
|
10 |
|
|
|
11 |
import ftfy
|
12 |
from huggingface_hub import hf_hub_download
|
13 |
from unidecode import unidecode
|
@@ -213,6 +214,8 @@ class TextNormalizer:
|
|
213 |
t = ftfy.fix_text(t)
|
214 |
# fix html
|
215 |
t = fix_html(t)
|
|
|
|
|
216 |
# decode and simplify text: see unidecode library
|
217 |
t = unidecode(t)
|
218 |
# lower case
|
|
|
8 |
import re
|
9 |
from pathlib import Path
|
10 |
|
11 |
+
import emoji
|
12 |
import ftfy
|
13 |
from huggingface_hub import hf_hub_download
|
14 |
from unidecode import unidecode
|
|
|
214 |
t = ftfy.fix_text(t)
|
215 |
# fix html
|
216 |
t = fix_html(t)
|
217 |
+
# decode emojis (would be removed by unidecode)
|
218 |
+
t = emoji.demojize(t)
|
219 |
# decode and simplify text: see unidecode library
|
220 |
t = unidecode(t)
|
221 |
# lower case
|