boris commited on
Commit
7ef7bd9
1 Parent(s): 9ecdd3f

feat(text): support emojis (#154)

Browse files
Files changed (2) hide show
  1. setup.cfg +1 -0
  2. src/dalle_mini/model/text.py +3 -0
setup.cfg CHANGED
@@ -27,6 +27,7 @@ install_requires =
27
  einops
28
  unidecode
29
  ftfy
 
30
  pillow
31
  jax
32
  flax
 
27
  einops
28
  unidecode
29
  ftfy
30
+ emoji
31
  pillow
32
  jax
33
  flax
src/dalle_mini/model/text.py CHANGED
@@ -8,6 +8,7 @@ import random
8
  import re
9
  from pathlib import Path
10
 
 
11
  import ftfy
12
  from huggingface_hub import hf_hub_download
13
  from unidecode import unidecode
@@ -213,6 +214,8 @@ class TextNormalizer:
213
  t = ftfy.fix_text(t)
214
  # fix html
215
  t = fix_html(t)
 
 
216
  # decode and simplify text: see unidecode library
217
  t = unidecode(t)
218
  # lower case
 
8
  import re
9
  from pathlib import Path
10
 
11
+ import emoji
12
  import ftfy
13
  from huggingface_hub import hf_hub_download
14
  from unidecode import unidecode
 
214
  t = ftfy.fix_text(t)
215
  # fix html
216
  t = fix_html(t)
217
+ # decode emojis (would be removed by unidecode)
218
+ t = emoji.demojize(t)
219
  # decode and simplify text: see unidecode library
220
  t = unidecode(t)
221
  # lower case