boris commited on
Commit
a09ea25
1 Parent(s): df2dbc7

feat: add ftfy

Browse files
Files changed (1) hide show
  1. dalle_mini/text.py +5 -1
dalle_mini/text.py CHANGED
@@ -5,8 +5,9 @@ Utilities for processing text.
5
  import requests
6
  from pathlib import Path
7
  from unidecode import unidecode
8
- import re, math, random, html
9
 
 
 
10
 
11
  WIKI_STATS_URL = "https://github.com/borisdayma/wikipedia-word-frequency/raw/feat-update/results/enwiki-20210820-words-frequency.txt"
12
  WIKI_STATS_LOCAL = Path(WIKI_STATS_URL).parts[-1]
@@ -220,6 +221,9 @@ class TextNormalizer:
220
  self._hashtag_processor = HashtagProcessor()
221
 
222
  def __call__(self, t, clip=False):
 
 
 
223
  # fix html
224
  t = fix_html(t)
225
  if not clip:
 
5
  import requests
6
  from pathlib import Path
7
  from unidecode import unidecode
 
8
 
9
+ import re, math, random, html
10
+ import ftfy
11
 
12
  WIKI_STATS_URL = "https://github.com/borisdayma/wikipedia-word-frequency/raw/feat-update/results/enwiki-20210820-words-frequency.txt"
13
  WIKI_STATS_LOCAL = Path(WIKI_STATS_URL).parts[-1]
 
221
  self._hashtag_processor = HashtagProcessor()
222
 
223
  def __call__(self, t, clip=False):
224
+
225
+ # fix some characters
226
+ t = ftfy.fix_text(t)
227
  # fix html
228
  t = fix_html(t)
229
  if not clip: